Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  # 
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  # 
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation 
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check 
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the 
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in 
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't 
  55  # cover everything we leave \w instead of specifying the exact letters as 
  56  # this should capture printf types defined in other platforms. 
  57  # extended to support Python named format specifiers 
  58  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  59   
  60  # The name of the XML tag 
  61  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  62   
  63  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  64  #TODO: remove escaped strings once usage is audited 
  65  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  66   
  67  # The whole tag 
  68  tag_re = re.compile("<[^>]+>") 
  69   
  70  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  71   
  72   
73 -def tagname(string):
74 """Returns the name of the XML/HTML tag in string""" 75 return tagname_re.match(string).groups(1)[0]
76 77
78 -def intuplelist(pair, list):
79 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 80 list as wildcards (only allowed in positions "a" and "c"). We take a 81 shortcut by only considering "c" if "b" has already matched.""" 82 a, b, c = pair 83 if (b, c) == (None, None): 84 #This is a tagname 85 return pair 86 for pattern in list: 87 x, y, z = pattern 88 if (x, y) in [(a, b), (None, b)]: 89 if z in [None, c]: 90 return pattern 91 return pair
92 93
94 -def tagproperties(strings, ignore):
95 """Returns all the properties in the XML/HTML tag string as 96 (tagname, propertyname, propertyvalue), but ignore those combinations 97 specified in ignore.""" 98 properties = [] 99 for string in strings: 100 tag = tagname(string) 101 properties += [(tag, None, None)] 102 #Now we isolate the attribute pairs. 103 pairs = property_re.findall(string) 104 for property, value, a, b in pairs: 105 #Strip the quotes: 106 value = value[1:-1] 107 108 canignore = False 109 if (tag, property, value) in ignore or \ 110 intuplelist((tag, property, value), ignore) != (tag, property, value): 111 canignore = True 112 break 113 if not canignore: 114 properties += [(tag, property, value)] 115 return properties
116 117
118 -class FilterFailure(Exception):
119 """This exception signals that a Filter didn't pass, and gives an 120 explanation or a comment""" 121
122 - def __init__(self, messages):
123 if not isinstance(messages, list): 124 messages = [messages] 125 assert isinstance(messages[0], unicode) # Assumption: all of same type 126 joined = u", ".join(messages) 127 Exception.__init__(self, joined) 128 # Python 2.3 doesn't have .args 129 if not hasattr(self, "args"): 130 self.args = joined
131 132
133 -class SeriousFilterFailure(FilterFailure):
134 """This exception signals that a Filter didn't pass, and the bad translation 135 might break an application (so the string will be marked fuzzy)""" 136 pass
137 138 #(tag, attribute, value) specifies a certain attribute which can be changed/ 139 #ignored if it exists inside tag. In the case where there is a third element 140 #in the tuple, it indicates a property value that can be ignored if present 141 #(like defaults, for example) 142 #If a certain item is None, it indicates that it is relevant for all values of 143 #the property/tag that is specified as None. A non-None value of "value" 144 #indicates that the value of the attribute must be taken into account. 145 common_ignoretags = [(None, "xml-lang", None)] 146 common_canchangetags = [("img", "alt", None), (None, "title", None)] 147 # Actually the title tag is allowed on many tags in HTML (but probably not all) 148 149
150 -class CheckerConfig(object):
151 """object representing the configuration of a checker""" 152
153 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 154 notranslatewords=None, musttranslatewords=None, 155 validchars=None, punctuation=None, endpunctuation=None, 156 ignoretags=None, canchangetags=None, criticaltests=None, 157 credit_sources=None):
158 # Init lists 159 self.accelmarkers = self._init_list(accelmarkers) 160 self.varmatches = self._init_list(varmatches) 161 self.criticaltests = self._init_list(criticaltests) 162 self.credit_sources = self._init_list(credit_sources) 163 # Lang data 164 self.targetlanguage = targetlanguage 165 self.updatetargetlanguage(targetlanguage) 166 self.sourcelang = factory.getlanguage('en') 167 # Inits with default values 168 self.punctuation = self._init_default(data.normalized_unicode(punctuation), 169 self.lang.punctuation) 170 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), 171 self.lang.sentenceend) 172 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 173 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 174 # Other data 175 # TODO: allow user configuration of untranslatable words 176 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 177 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 178 validchars = data.normalized_unicode(validchars) 179 self.validcharsmap = {} 180 self.updatevalidchars(validchars)
181
182 - def _init_list(self, list):
183 """initialise configuration paramaters that are lists 184 185 @type list: List 186 @param list: None (we'll initialise a blank list) or a list paramater 187 @rtype: List 188 """ 189 if list is None: 190 list = [] 191 return list
192
193 - def _init_default(self, param, default):
194 """initialise parameters that can have default options 195 196 @param param: the user supplied paramater value 197 @param default: default values when param is not specified 198 @return: the paramater as specified by the user of the default settings 199 """ 200 if param is None: 201 return default 202 return param
203
204 - def update(self, otherconfig):
205 """combines the info in otherconfig into this config object""" 206 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 207 self.updatetargetlanguage(self.targetlanguage) 208 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 209 self.varmatches.extend(otherconfig.varmatches) 210 self.notranslatewords.update(otherconfig.notranslatewords) 211 self.musttranslatewords.update(otherconfig.musttranslatewords) 212 self.validcharsmap.update(otherconfig.validcharsmap) 213 self.punctuation += otherconfig.punctuation 214 self.endpunctuation += otherconfig.endpunctuation 215 #TODO: consider also updating in the following cases: 216 self.ignoretags = otherconfig.ignoretags 217 self.canchangetags = otherconfig.canchangetags 218 self.criticaltests.extend(otherconfig.criticaltests) 219 self.credit_sources = otherconfig.credit_sources
220
221 - def updatevalidchars(self, validchars):
222 """updates the map that eliminates valid characters""" 223 if validchars is None: 224 return True 225 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 226 self.validcharsmap.update(validcharsmap)
227
228 - def updatetargetlanguage(self, langcode):
229 """Updates the target language in the config to the given target 230 language""" 231 self.lang = factory.getlanguage(langcode)
232 233
234 -def cache_results(f):
235 236 def cached_f(self, param1): 237 key = (f.__name__, param1) 238 res_cache = self.results_cache 239 if key in res_cache: 240 return res_cache[key] 241 else: 242 value = f(self, param1) 243 res_cache[key] = value 244 return value
245 return cached_f 246 247
248 -class UnitChecker(object):
249 """Parent Checker class which does the checking based on functions available 250 in derived classes.""" 251 preconditions = {} 252
253 - def __init__(self, checkerconfig=None, excludefilters=None, 254 limitfilters=None, errorhandler=None):
255 self.errorhandler = errorhandler 256 if checkerconfig is None: 257 self.setconfig(CheckerConfig()) 258 else: 259 self.setconfig(checkerconfig) 260 # exclude functions defined in UnitChecker from being treated as tests. 261 self.helperfunctions = {} 262 for functionname in dir(UnitChecker): 263 function = getattr(self, functionname) 264 if callable(function): 265 self.helperfunctions[functionname] = function 266 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 267 self.results_cache = {}
268
269 - def getfilters(self, excludefilters=None, limitfilters=None):
270 """returns dictionary of available filters, including/excluding those in 271 the given lists""" 272 filters = {} 273 if limitfilters is None: 274 # use everything available unless instructed 275 limitfilters = dir(self) 276 if excludefilters is None: 277 excludefilters = {} 278 for functionname in limitfilters: 279 if functionname in excludefilters: 280 continue 281 if functionname in self.helperfunctions: 282 continue 283 if functionname == "errorhandler": 284 continue 285 filterfunction = getattr(self, functionname, None) 286 if not callable(filterfunction): 287 continue 288 filters[functionname] = filterfunction 289 return filters
290
291 - def setconfig(self, config):
292 """sets the accelerator list""" 293 self.config = config 294 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 295 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 296 for startmatch, endmatch in self.config.varmatches] 297 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, 298 prefilters.varnone) 299 for startmatch, endmatch in self.config.varmatches]
300
301 - def setsuggestionstore(self, store):
302 """Sets the filename that a checker should use for evaluating 303 suggestions.""" 304 self.suggestion_store = store 305 if self.suggestion_store: 306 self.suggestion_store.require_index()
307
308 - def filtervariables(self, str1):
309 """filter out variables from str1""" 310 return helpers.multifilter(str1, self.varfilters)
311 filtervariables = cache_results(filtervariables) 312
313 - def removevariables(self, str1):
314 """remove variables from str1""" 315 return helpers.multifilter(str1, self.removevarfilter)
316 removevariables = cache_results(removevariables) 317
318 - def filteraccelerators(self, str1):
319 """filter out accelerators from str1""" 320 return helpers.multifilter(str1, self.accfilters, None)
321 filteraccelerators = cache_results(filteraccelerators) 322
323 - def filteraccelerators_by_list(self, str1, acceptlist=None):
324 """filter out accelerators from str1""" 325 return helpers.multifilter(str1, self.accfilters, acceptlist)
326
327 - def filterwordswithpunctuation(self, str1):
328 """replaces words with punctuation with their unpunctuated 329 equivalents""" 330 return prefilters.filterwordswithpunctuation(str1)
331 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 332
333 - def filterxml(self, str1):
334 """filter out XML from the string so only text remains""" 335 return tag_re.sub("", str1)
336 filterxml = cache_results(filterxml) 337
338 - def run_test(self, test, unit):
339 """Runs the given test on the given unit. 340 341 Note that this can raise a FilterFailure as part of normal operation""" 342 return test(unit)
343
344 - def run_filters(self, unit):
345 """run all the tests in this suite, return failures as testname, 346 message_or_exception""" 347 self.results_cache = {} 348 failures = {} 349 ignores = self.config.lang.ignoretests[:] 350 functionnames = self.defaultfilters.keys() 351 priorityfunctionnames = self.preconditions.keys() 352 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 353 for functionname in priorityfunctionnames + otherfunctionnames: 354 if functionname in ignores: 355 continue 356 filterfunction = getattr(self, functionname, None) 357 # this filterfunction may only be defined on another checker if 358 # using TeeChecker 359 if filterfunction is None: 360 continue 361 filtermessage = filterfunction.__doc__ 362 try: 363 filterresult = self.run_test(filterfunction, unit) 364 except FilterFailure, e: 365 filterresult = False 366 filtermessage = e.args[0] 367 except Exception, e: 368 if self.errorhandler is None: 369 raise ValueError("error in filter %s: %r, %r, %s" % \ 370 (functionname, unit.source, unit.target, e)) 371 else: 372 filterresult = self.errorhandler(functionname, unit.source, 373 unit.target, e) 374 if not filterresult: 375 # we test some preconditions that aren't actually a cause for 376 # failure 377 if functionname in self.defaultfilters: 378 failures[functionname] = filtermessage 379 if functionname in self.preconditions: 380 for ignoredfunctionname in self.preconditions[functionname]: 381 ignores.append(ignoredfunctionname) 382 self.results_cache = {} 383 return failures
384 385
386 -class TranslationChecker(UnitChecker):
387 """A checker that passes source and target strings to the checks, not the 388 whole unit. 389 390 This provides some speedup and simplifies testing.""" 391
392 - def __init__(self, checkerconfig=None, excludefilters=None, 393 limitfilters=None, errorhandler=None):
394 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, 395 limitfilters, errorhandler)
396
397 - def run_test(self, test, unit):
398 """Runs the given test on the given unit. 399 400 Note that this can raise a FilterFailure as part of normal operation.""" 401 if self.hasplural: 402 filtermessages = [] 403 filterresult = True 404 for pluralform in unit.target.strings: 405 try: 406 if not test(self.str1, unicode(pluralform)): 407 filterresult = False 408 except FilterFailure, e: 409 filterresult = False 410 filtermessages.append(unicode(e.args)) 411 if not filterresult and filtermessages: 412 raise FilterFailure(filtermessages) 413 else: 414 return filterresult 415 else: 416 return test(self.str1, self.str2)
417
418 - def run_filters(self, unit):
419 """Do some optimisation by caching some data of the unit for the benefit 420 of run_test().""" 421 self.str1 = data.normalized_unicode(unit.source) or u"" 422 self.str2 = data.normalized_unicode(unit.target) or u"" 423 self.hasplural = unit.hasplural() 424 self.locations = unit.getlocations() 425 return super(TranslationChecker, self).run_filters(unit)
426 427
428 -class TeeChecker:
429 """A Checker that controls multiple checkers.""" 430
431 - def __init__(self, checkerconfig=None, excludefilters=None, 432 limitfilters=None, checkerclasses=None, errorhandler=None, 433 languagecode=None):
434 """construct a TeeChecker from the given checkers""" 435 self.limitfilters = limitfilters 436 if checkerclasses is None: 437 checkerclasses = [StandardChecker] 438 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 439 if languagecode: 440 for checker in self.checkers: 441 checker.config.updatetargetlanguage(languagecode) 442 # Let's hook up the language specific checker 443 lang_checker = self.checkers[0].config.lang.checker 444 if lang_checker: 445 self.checkers.append(lang_checker) 446 447 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 448 self.config = checkerconfig or self.checkers[0].config
449
450 - def getfilters(self, excludefilters=None, limitfilters=None):
451 """returns dictionary of available filters, including/excluding those in 452 the given lists""" 453 if excludefilters is None: 454 excludefilters = {} 455 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 456 self.combinedfilters = {} 457 for filters in filterslist: 458 self.combinedfilters.update(filters) 459 # TODO: move this somewhere more sensible (a checkfilters method?) 460 if limitfilters is not None: 461 for filtername in limitfilters: 462 if not filtername in self.combinedfilters: 463 import sys 464 print >> sys.stderr, "warning: could not find filter %s" % filtername 465 return self.combinedfilters
466
467 - def run_filters(self, unit):
468 """run all the tests in the checker's suites""" 469 failures = {} 470 for checker in self.checkers: 471 failures.update(checker.run_filters(unit)) 472 return failures
473
474 - def setsuggestionstore(self, store):
475 """Sets the filename that a checker should use for evaluating 476 suggestions.""" 477 for checker in self.checkers: 478 checker.setsuggestionstore(store)
479 480
481 -class StandardChecker(TranslationChecker):
482 """The basic test suite for source -> target translations.""" 483
484 - def untranslated(self, str1, str2):
485 """checks whether a string has been translated at all""" 486 str2 = prefilters.removekdecomments(str2) 487 return not (len(str1.strip()) > 0 and len(str2) == 0)
488
489 - def unchanged(self, str1, str2):
490 """checks whether a translation is basically identical to the original 491 string""" 492 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 493 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 494 if len(str1) < 2: 495 return True 496 # If the whole string is upperase, or nothing in the string can go 497 # towards uppercase, let's assume there is nothing translatable 498 # TODO: reconsider 499 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 500 return True 501 if self.config.notranslatewords: 502 words1 = str1.split() 503 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 504 #currently equivalent to: 505 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 506 #why do we only test for one notranslate word? 507 return True 508 # we could also check for things like str1.isnumeric(), but the test 509 # above (str1.upper() == str1) makes this unnecessary 510 if str1.lower() == str2.lower(): 511 raise FilterFailure(u"please translate") 512 return True
513
514 - def blank(self, str1, str2):
515 """checks whether a translation only contains spaces""" 516 len1 = len(str1.strip()) 517 len2 = len(str2.strip()) 518 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
519
520 - def short(self, str1, str2):
521 """checks whether a translation is much shorter than the original 522 string""" 523 len1 = len(str1.strip()) 524 len2 = len(str2.strip()) 525 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
526
527 - def long(self, str1, str2):
528 """checks whether a translation is much longer than the original 529 string""" 530 len1 = len(str1.strip()) 531 len2 = len(str2.strip()) 532 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
533
534 - def escapes(self, str1, str2):
535 """checks whether escaping is consistent between the two strings""" 536 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 537 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 538 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 539 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 540 else: 541 return True
542
543 - def newlines(self, str1, str2):
544 """checks whether newlines are consistent between the two strings""" 545 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 546 raise FilterFailure(u"line endings in original don't match line endings in translation") 547 else: 548 return True
549
550 - def tabs(self, str1, str2):
551 """checks whether tabs are consistent between the two strings""" 552 if not helpers.countmatch(str1, str2, "\t"): 553 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 554 else: 555 return True
556
557 - def singlequoting(self, str1, str2):
558 """checks whether singlequoting is consistent between the two strings""" 559 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 560 str1 = self.config.lang.punctranslate(str1) 561 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 562 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
563
564 - def doublequoting(self, str1, str2):
565 """checks whether doublequoting is consistent between the two strings""" 566 str1 = self.filteraccelerators(self.filtervariables(str1)) 567 str1 = self.filterxml(str1) 568 str1 = self.config.lang.punctranslate(str1) 569 str2 = self.filteraccelerators(self.filtervariables(str2)) 570 str2 = self.filterxml(str2) 571 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", 572 u"»", u"“", u"”"))
573
574 - def doublespacing(self, str1, str2):
575 """checks for bad double-spaces by comparing to original""" 576 str1 = self.filteraccelerators(str1) 577 str2 = self.filteraccelerators(str2) 578 return helpers.countmatch(str1, str2, u" ")
579
580 - def puncspacing(self, str1, str2):
581 """checks for bad spacing after punctuation""" 582 # Convert all nbsp to space, and just check spaces. Useful intermediate 583 # step to stricter nbsp checking? 584 str1 = self.filteraccelerators(self.filtervariables(str1)) 585 str1 = self.config.lang.punctranslate(str1) 586 str1 = str1.replace(u"\u00a0", u" ") 587 if str1.find(u" ") == -1: 588 return True 589 str2 = self.filteraccelerators(self.filtervariables(str2)) 590 str2 = str2.replace(u"\u00a0", u" ") 591 for puncchar in self.config.punctuation: 592 plaincount1 = str1.count(puncchar) 593 plaincount2 = str2.count(puncchar) 594 if not plaincount1 or plaincount1 != plaincount2: 595 continue 596 spacecount1 = str1.count(puncchar + u" ") 597 spacecount2 = str2.count(puncchar + u" ") 598 if spacecount1 != spacecount2: 599 # handle extra spaces that are because of transposed punctuation 600 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1 - spacecount2) == 1: 601 continue 602 return False 603 return True
604
605 - def printf(self, str1, str2):
606 """checks whether printf format strings match""" 607 count1 = count2 = plural = None 608 # self.hasplural only set by run_filters, not always available 609 if 'hasplural' in self.__dict__: 610 plural = self.hasplural 611 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 612 count2 = var_num2 + 1 613 str2key = match2.group('key') 614 if match2.group('ord'): 615 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 616 count1 = var_num1 + 1 617 if int(match2.group('ord')) == var_num1 + 1: 618 if match2.group('fullvar') != match1.group('fullvar'): 619 return 0 620 elif str2key: 621 str1key = None 622 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 623 count1 = var_num1 + 1 624 if match1.group('key') and str2key == match1.group('key'): 625 str1key = match1.group('key') 626 # '%.0s' "placeholder" in plural will match anything 627 if plural and match2.group('fullvar') == '.0s': 628 continue 629 if match1.group('fullvar') != match2.group('fullvar'): 630 return 0 631 if str1key == None: 632 return 0 633 else: 634 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 635 count1 = var_num1 + 1 636 # '%.0s' "placeholder" in plural will match anything 637 if plural and match2.group('fullvar') == '.0s': 638 continue 639 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 640 return 0 641 642 if count2 is None: 643 if list(printf_pat.finditer(str1)): 644 return 0 645 646 if (count1 or count2) and (count1 != count2): 647 return 0 648 return 1
649
650 - def accelerators(self, str1, str2):
651 """checks whether accelerators are consistent between the two strings""" 652 str1 = self.filtervariables(str1) 653 str2 = self.filtervariables(str2) 654 messages = [] 655 for accelmarker in self.config.accelmarkers: 656 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 657 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 658 count1, countbad1 = counter1(str1) 659 count2, countbad2 = counter2(str2) 660 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 661 accel2, bad2 = getaccel(str2) 662 if count1 == count2: 663 continue 664 if count1 == 1 and count2 == 0: 665 if countbad2 == 1: 666 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 667 else: 668 messages.append(u"accelerator %s is missing from translation" % accelmarker) 669 elif count1 == 0: 670 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 671 elif count1 == 1 and count2 > count1: 672 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 673 else: 674 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 675 if messages: 676 if "accelerators" in self.config.criticaltests: 677 raise SeriousFilterFailure(messages) 678 else: 679 raise FilterFailure(messages) 680 return True
681 682 # def acceleratedvariables(self, str1, str2): 683 # """checks that no variables are accelerated""" 684 # messages = [] 685 # for accelerator in self.config.accelmarkers: 686 # for variablestart, variableend in self.config.varmatches: 687 # error = accelerator + variablestart 688 # if str1.find(error) >= 0: 689 # messages.append(u"original has an accelerated variable") 690 # if str2.find(error) >= 0: 691 # messages.append(u"translation has an accelerated variable") 692 # if messages: 693 # raise FilterFailure(messages) 694 # return True 695
696 - def variables(self, str1, str2):
697 """checks whether variables of various forms are consistent between the 698 two strings""" 699 messages = [] 700 mismatch1, mismatch2 = [], [] 701 varnames1, varnames2 = [], [] 702 for startmarker, endmarker in self.config.varmatches: 703 varchecker = decoration.getvariables(startmarker, endmarker) 704 if startmarker and endmarker: 705 if isinstance(endmarker, int): 706 redecorate = lambda var: startmarker + var 707 else: 708 redecorate = lambda var: startmarker + var + endmarker 709 elif startmarker: 710 redecorate = lambda var: startmarker + var 711 else: 712 redecorate = lambda var: var 713 vars1 = varchecker(str1) 714 vars2 = varchecker(str2) 715 if vars1 != vars2: 716 # we use counts to compare so we can handle multiple variables 717 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 718 # filter variable names we've already seen, so they aren't 719 # matched by more than one filter... 720 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 721 varnames1.extend(vars1) 722 varnames2.extend(vars2) 723 vars1 = map(redecorate, vars1) 724 vars2 = map(redecorate, vars2) 725 mismatch1.extend(vars1) 726 mismatch2.extend(vars2) 727 if mismatch1: 728 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 729 elif mismatch2: 730 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 731 if messages and mismatch1: 732 raise SeriousFilterFailure(messages) 733 elif messages: 734 raise FilterFailure(messages) 735 return True
736
737 - def functions(self, str1, str2):
738 """checks that function names are not translated""" 739 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
740
741 - def emails(self, str1, str2):
742 """checks that emails are not translated""" 743 return helpers.funcmatch(str1, str2, decoration.getemails)
744
745 - def urls(self, str1, str2):
746 """checks that URLs are not translated""" 747 return helpers.funcmatch(str1, str2, decoration.geturls)
748
749 - def numbers(self, str1, str2):
750 """checks whether numbers of various forms are consistent between the 751 two strings""" 752 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
753
754 - def startwhitespace(self, str1, str2):
755 """checks whether whitespace at the beginning of the strings matches""" 756 return helpers.funcmatch(str1, str2, decoration.spacestart)
757
758 - def endwhitespace(self, str1, str2):
759 """checks whether whitespace at the end of the strings matches""" 760 str1 = self.config.lang.punctranslate(str1) 761 return helpers.funcmatch(str1, str2, decoration.spaceend)
762
763 - def startpunc(self, str1, str2):
764 """checks whether punctuation at the beginning of the strings match""" 765 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))) 766 str1 = self.config.lang.punctranslate(str1) 767 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))) 768 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
769
770 - def endpunc(self, str1, str2):
771 """checks whether punctuation at the end of the strings match""" 772 str1 = self.filtervariables(str1) 773 str1 = self.config.lang.punctranslate(str1) 774 str2 = self.filtervariables(str2) 775 str1 = str1.rstrip() 776 str2 = str2.rstrip() 777 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
778
779 - def purepunc(self, str1, str2):
780 """checks that strings that are purely punctuation are not changed""" 781 # this test is a subset of startandend 782 if (decoration.ispurepunctuation(str1)): 783 return str1 == str2 784 else: 785 return not decoration.ispurepunctuation(str2)
786
787 - def brackets(self, str1, str2):
788 """checks that the number of brackets in both strings match""" 789 str1 = self.filtervariables(str1) 790 str2 = self.filtervariables(str2) 791 messages = [] 792 missing = [] 793 extra = [] 794 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 795 count1 = str1.count(bracket) 796 count2 = str2.count(bracket) 797 if count2 < count1: 798 missing.append(u"'%s'" % bracket) 799 elif count2 > count1: 800 extra.append(u"'%s'" % bracket) 801 if missing: 802 messages.append(u"translation is missing %s" % u", ".join(missing)) 803 if extra: 804 messages.append(u"translation has extra %s" % u", ".join(extra)) 805 if messages: 806 raise FilterFailure(messages) 807 return True
808
809 - def sentencecount(self, str1, str2):
810 """checks that the number of sentences in both strings match""" 811 str1 = self.filteraccelerators(str1) 812 str2 = self.filteraccelerators(str2) 813 sentences1 = len(self.config.sourcelang.sentences(str1)) 814 sentences2 = len(self.config.lang.sentences(str2)) 815 if not sentences1 == sentences2: 816 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 817 return True
818
819 - def options(self, str1, str2):
820 """checks that options are not translated""" 821 str1 = self.filtervariables(str1) 822 for word1 in str1.split(): 823 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 824 parts = word1.split(u"=") 825 if not parts[0] in str2: 826 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 827 if len(parts) > 1 and parts[1] in str2: 828 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 829 return True
830
831 - def startcaps(self, str1, str2):
832 """checks that the message starts with the correct capitalisation""" 833 str1 = self.filteraccelerators(str1) 834 str2 = self.filteraccelerators(str2) 835 if len(str1) > 1 and len(str2) > 1: 836 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 837 if len(str1) == 0 and len(str2) == 0: 838 return True 839 if len(str1) == 0 or len(str2) == 0: 840 return False 841 return True
842
843 - def simplecaps(self, str1, str2):
844 """checks the capitalisation of two strings isn't wildly different""" 845 str1 = self.removevariables(str1) 846 str2 = self.removevariables(str2) 847 # TODO: review this. The 'I' is specific to English, so it probably 848 # serves no purpose to get sourcelang.sentenceend 849 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 850 capitals1 = helpers.filtercount(str1, unicode.isupper) 851 capitals2 = helpers.filtercount(str2, unicode.isupper) 852 alpha1 = helpers.filtercount(str1, unicode.isalpha) 853 alpha2 = helpers.filtercount(str2, unicode.isalpha) 854 # Capture the all caps case 855 if capitals1 == alpha1: 856 return capitals2 == alpha2 857 # some heuristic tests to try and see that the style of capitals is 858 # vaguely the same 859 if capitals1 == 0 or capitals1 == 1: 860 return capitals2 == capitals1 861 elif capitals1 < len(str1) / 10: 862 return capitals2 <= len(str2) / 8 863 elif len(str1) < 10: 864 return abs(capitals1 - capitals2) < 3 865 elif capitals1 > len(str1) * 6 / 10: 866 return capitals2 > len(str2) * 6 / 10 867 else: 868 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
869
870 - def acronyms(self, str1, str2):
871 """checks that acronyms that appear are unchanged""" 872 acronyms = [] 873 allowed = [] 874 for startmatch, endmatch in self.config.varmatches: 875 allowed += decoration.getvariables(startmatch, endmatch)(str1) 876 allowed += self.config.musttranslatewords.keys() 877 str1 = self.filteraccelerators(self.filtervariables(str1)) 878 iter = self.config.lang.word_iter(str1) 879 str2 = self.filteraccelerators(self.filtervariables(str2)) 880 #TODO: strip XML? - should provide better error messsages 881 # see mail/chrome/messanger/smime.properties.po 882 #TODO: consider limiting the word length for recognising acronyms to 883 #something like 5/6 characters 884 for word in iter: 885 if word.isupper() and len(word) > 1 and word not in allowed: 886 if str2.find(word) == -1: 887 acronyms.append(word) 888 if acronyms: 889 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 890 return True
891
892 - def doublewords(self, str1, str2):
893 """checks for repeated words in the translation""" 894 lastword = "" 895 without_newlines = "\n".join(str2.split("\n")) 896 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split() 897 for word in words: 898 if word == lastword and word not in self.config.lang.validdoublewords: 899 raise FilterFailure(u"The word '%s' is repeated" % word) 900 lastword = word 901 return True
902
903 - def notranslatewords(self, str1, str2):
904 """checks that words configured as untranslatable appear in the 905 translation too""" 906 if not self.config.notranslatewords: 907 return True 908 str1 = self.filtervariables(str1) 909 str2 = self.filtervariables(str2) 910 #The above is full of strange quotes and things in utf-8 encoding. 911 #single apostrophe perhaps problematic in words like "doesn't" 912 for seperator in self.config.punctuation: 913 str1 = str1.replace(seperator, u" ") 914 str2 = str2.replace(seperator, u" ") 915 words1 = self.filteraccelerators(str1).split() 916 words2 = self.filteraccelerators(str2).split() 917 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 918 if stopwords: 919 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 920 return True
921
922 - def musttranslatewords(self, str1, str2):
923 """checks that words configured as definitely translatable don't appear 924 in the translation""" 925 if not self.config.musttranslatewords: 926 return True 927 str1 = self.removevariables(str1) 928 str2 = self.removevariables(str2) 929 # The above is full of strange quotes and things in utf-8 encoding. 930 # single apostrophe perhaps problematic in words like "doesn't" 931 for seperator in self.config.punctuation: 932 str1 = str1.replace(seperator, u" ") 933 str2 = str2.replace(seperator, u" ") 934 words1 = self.filteraccelerators(str1).split() 935 words2 = self.filteraccelerators(str2).split() 936 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 937 if stopwords: 938 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 939 return True
940
941 - def validchars(self, str1, str2):
942 """checks that only characters specified as valid appear in the 943 translation""" 944 if not self.config.validcharsmap: 945 return True 946 invalid1 = str1.translate(self.config.validcharsmap) 947 invalid2 = str2.translate(self.config.validcharsmap) 948 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 949 if invalidchars: 950 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 951 return True
952
953 - def filepaths(self, str1, str2):
954 """checks that file paths have not been translated""" 955 for word1 in self.filteraccelerators(str1).split(): 956 if word1.startswith(u"/"): 957 if not helpers.countsmatch(str1, str2, (word1,)): 958 return False 959 return True
960
961 - def xmltags(self, str1, str2):
962 """checks that XML/HTML tags have not been translated""" 963 tags1 = tag_re.findall(str1) 964 if len(tags1) > 0: 965 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 966 return True 967 tags2 = tag_re.findall(str2) 968 properties1 = tagproperties(tags1, self.config.ignoretags) 969 properties2 = tagproperties(tags2, self.config.ignoretags) 970 filtered1 = [] 971 filtered2 = [] 972 for property1 in properties1: 973 filtered1 += [intuplelist(property1, self.config.canchangetags)] 974 for property2 in properties2: 975 filtered2 += [intuplelist(property2, self.config.canchangetags)] 976 977 # TODO: consider the consequences of different ordering of 978 # attributes/tags 979 if filtered1 != filtered2: 980 return False 981 else: 982 # No tags in str1, let's just check that none were added in str2. 983 # This might be useful for fuzzy strings wrongly unfuzzied. 984 tags2 = tag_re.findall(str2) 985 if len(tags2) > 0: 986 return False 987 return True
988
989 - def kdecomments(self, str1, str2):
990 """checks to ensure that no KDE style comments appear in the 991 translation""" 992 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
993
994 - def compendiumconflicts(self, str1, str2):
995 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 996 return str2.find(u"#-#-#-#-#") == -1
997
998 - def simpleplurals(self, str1, str2):
999 """checks for English style plural(s) for you to review""" 1000 1001 def numberofpatterns(string, patterns): 1002 number = 0 1003 for pattern in patterns: 1004 number += len(re.findall(pattern, string)) 1005 return number
1006 1007 sourcepatterns = ["\(s\)"] 1008 targetpatterns = ["\(s\)"] 1009 sourcecount = numberofpatterns(str1, sourcepatterns) 1010 targetcount = numberofpatterns(str2, targetpatterns) 1011 if self.config.lang.nplurals == 1: 1012 return not targetcount 1013 return sourcecount == targetcount
1014
1015 - def spellcheck(self, str1, str2):
1016 """checks words that don't pass a spell check""" 1017 if not self.config.targetlanguage: 1018 return True 1019 if not spelling.available: 1020 return True 1021 # TODO: filterxml? 1022 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 1023 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 1024 ignore1 = [] 1025 messages = [] 1026 for word, index, suggestions in spelling.check(str1, lang="en"): 1027 ignore1.append(word) 1028 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 1029 if word in self.config.notranslatewords: 1030 continue 1031 if word in ignore1: 1032 continue 1033 # hack to ignore hyphenisation rules 1034 if word in suggestions: 1035 continue 1036 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 1037 if messages: 1038 raise FilterFailure(messages) 1039 return True
1040
1041 - def credits(self, str1, str2):
1042 """checks for messages containing translation credits instead of normal 1043 translations.""" 1044 return not str1 in self.config.credit_sources
1045 1046 # If the precondition filter is run and fails then the other tests listed are ignored 1047 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 1048 "accelerators", "brackets", "endpunc", 1049 "acronyms", "xmltags", "startpunc", 1050 "endwhitespace", "startwhitespace", 1051 "escapes", "doublequoting", "singlequoting", 1052 "filepaths", "purepunc", "doublespacing", 1053 "sentencecount", "numbers", "isfuzzy", 1054 "isreview", "notranslatewords", "musttranslatewords", 1055 "emails", "simpleplurals", "urls", "printf", 1056 "tabs", "newlines", "functions", "options", 1057 "blank", "nplurals", "gconf"), 1058 "blank": ("simplecaps", "variables", "startcaps", 1059 "accelerators", "brackets", "endpunc", 1060 "acronyms", "xmltags", "startpunc", 1061 "endwhitespace", "startwhitespace", 1062 "escapes", "doublequoting", "singlequoting", 1063 "filepaths", "purepunc", "doublespacing", 1064 "sentencecount", "numbers", "isfuzzy", 1065 "isreview", "notranslatewords", "musttranslatewords", 1066 "emails", "simpleplurals", "urls", "printf", 1067 "tabs", "newlines", "functions", "options", 1068 "gconf"), 1069 "credits": ("simplecaps", "variables", "startcaps", 1070 "accelerators", "brackets", "endpunc", 1071 "acronyms", "xmltags", "startpunc", 1072 "escapes", "doublequoting", "singlequoting", 1073 "filepaths", "doublespacing", 1074 "sentencecount", "numbers", 1075 "emails", "simpleplurals", "urls", "printf", 1076 "tabs", "newlines", "functions", "options"), 1077 "purepunc": ("startcaps", "options"), 1078 # This is causing some problems since Python 2.6, as 1079 # startcaps is now seen as an important one to always execute 1080 # and could now be done before it is blocked by a failing 1081 # "untranslated" or "blank" test. This is probably happening 1082 # due to slightly different implementation of the internal 1083 # dict handling since Python 2.6. We should never have relied 1084 # on this ordering anyway. 1085 #"startcaps": ("simplecaps",), 1086 "endwhitespace": ("endpunc",), 1087 "startwhitespace": ("startpunc",), 1088 "unchanged": ("doublewords",), 1089 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1090 "numbers", "startpunc", "long", "variables", 1091 "startcaps", "sentencecount", "simplecaps", 1092 "doublespacing", "endpunc", "xmltags", 1093 "startwhitespace", "endwhitespace", 1094 "singlequoting", "doublequoting", 1095 "filepaths", "purepunc", "doublewords", "printf")} 1096 1097 # code to actually run the tests (use unittest?) 1098 1099 openofficeconfig = CheckerConfig( 1100 accelmarkers=["~"], 1101 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), 1102 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), 1103 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1104 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), 1105 ("img", "width", None), ("img", "height", None)], 1106 canchangetags=[("link", "name", None)], 1107 ) 1108
1109 -class OpenOfficeChecker(StandardChecker):
1110
1111 - def __init__(self, **kwargs):
1112 checkerconfig = kwargs.get("checkerconfig", None) 1113 if checkerconfig is None: 1114 checkerconfig = CheckerConfig() 1115 kwargs["checkerconfig"] = checkerconfig 1116 checkerconfig.update(openofficeconfig) 1117 StandardChecker.__init__(self, **kwargs)
1118 1119 mozillaconfig = CheckerConfig( 1120 accelmarkers=["&"], 1121 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), 1122 ("#", 1), ("${", "}"), ("$(^", ")")], 1123 criticaltests=["accelerators"], 1124 ) 1125
1126 -class MozillaChecker(StandardChecker):
1127
1128 - def __init__(self, **kwargs):
1129 checkerconfig = kwargs.get("checkerconfig", None) 1130 if checkerconfig is None: 1131 checkerconfig = CheckerConfig() 1132 kwargs["checkerconfig"] = checkerconfig 1133 checkerconfig.update(mozillaconfig) 1134 StandardChecker.__init__(self, **kwargs)
1135
1136 - def credits(self, str1, str2):
1137 """checks for messages containing translation credits instead of normal 1138 translations.""" 1139 for location in self.locations: 1140 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1141 return False 1142 return True
1143 1144 drupalconfig = CheckerConfig( 1145 varmatches=[("%", None), ("@", None), ("!", None)], 1146 ) 1147
1148 -class DrupalChecker(StandardChecker):
1149
1150 - def __init__(self, **kwargs):
1151 checkerconfig = kwargs.get("checkerconfig", None) 1152 if checkerconfig is None: 1153 checkerconfig = CheckerConfig() 1154 kwargs["checkerconfig"] = checkerconfig 1155 checkerconfig.update(drupalconfig) 1156 StandardChecker.__init__(self, **kwargs)
1157 1158 gnomeconfig = CheckerConfig( 1159 accelmarkers=["_"], 1160 varmatches=[("%", 1), ("$(", ")")], 1161 credit_sources=[u"translator-credits"], 1162 ) 1163
1164 -class GnomeChecker(StandardChecker):
1165
1166 - def __init__(self, **kwargs):
1167 checkerconfig = kwargs.get("checkerconfig", None) 1168 if checkerconfig is None: 1169 checkerconfig = CheckerConfig() 1170 kwargs["checkerconfig"] = checkerconfig 1171 checkerconfig.update(gnomeconfig) 1172 StandardChecker.__init__(self, **kwargs)
1173
1174 - def gconf(self, str1, str2):
1175 """Checks if we have any gconf config settings translated.""" 1176 for location in self.locations: 1177 if location.find('schemas.in') != -1: 1178 gconf_attributes = gconf_attribute_re.findall(str1) 1179 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1180 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1181 if stopwords: 1182 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1183 return True
1184 1185 kdeconfig = CheckerConfig( 1186 accelmarkers=["&"], 1187 varmatches=[("%", 1)], 1188 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"], 1189 ) 1190
1191 -class KdeChecker(StandardChecker):
1192
1193 - def __init__(self, **kwargs):
1194 # TODO allow setup of KDE plural and translator comments so that they do 1195 # not create false postives 1196 checkerconfig = kwargs.get("checkerconfig", None) 1197 if checkerconfig is None: 1198 checkerconfig = CheckerConfig() 1199 kwargs["checkerconfig"] = checkerconfig 1200 checkerconfig.update(kdeconfig) 1201 StandardChecker.__init__(self, **kwargs)
1202 1203 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")]) 1204
1205 -class CCLicenseChecker(StandardChecker):
1206
1207 - def __init__(self, **kwargs):
1208 checkerconfig = kwargs.get("checkerconfig", None) 1209 if checkerconfig is None: 1210 checkerconfig = CheckerConfig() 1211 kwargs["checkerconfig"] = checkerconfig 1212 checkerconfig.update(cclicenseconfig) 1213 StandardChecker.__init__(self, **kwargs)
1214 1215 projectcheckers = { 1216 "openoffice": OpenOfficeChecker, 1217 "mozilla": MozillaChecker, 1218 "kde": KdeChecker, 1219 "wx": KdeChecker, 1220 "gnome": GnomeChecker, 1221 "creativecommons": CCLicenseChecker, 1222 "drupal": DrupalChecker, 1223 } 1224 1225
1226 -class StandardUnitChecker(UnitChecker):
1227 """The standard checks for common checks on translation units.""" 1228
1229 - def isfuzzy(self, unit):
1230 """Check if the unit has been marked fuzzy.""" 1231 return not unit.isfuzzy()
1232
1233 - def isreview(self, unit):
1234 """Check if the unit has been marked review.""" 1235 return not unit.isreview()
1236
1237 - def nplurals(self, unit):
1238 """Checks for the correct number of noun forms for plural 1239 translations.""" 1240 if unit.hasplural(): 1241 # if we don't have a valid nplurals value, don't run the test 1242 nplurals = self.config.lang.nplurals 1243 if nplurals > 0: 1244 return len(unit.target.strings) == nplurals 1245 return True
1246
1247 - def hassuggestion(self, unit):
1248 """Checks if there is at least one suggested translation for this 1249 unit.""" 1250 self.suggestion_store = getattr(self, 'suggestion_store', None) 1251 suggestions = [] 1252 if self.suggestion_store: 1253 suggestions = self.suggestion_store.findunits(unit.source) 1254 elif xliff and isinstance(unit, xliff.xliffunit): 1255 # TODO: we probably want to filter them somehow 1256 suggestions = unit.getalttrans() 1257 return not bool(suggestions)
1258 1259
1260 -def runtests(str1, str2, ignorelist=()):
1261 """verifies that the tests pass for a pair of strings""" 1262 from translate.storage import base 1263 str1 = data.normalized_unicode(str1) 1264 str2 = data.normalized_unicode(str2) 1265 unit = base.TranslationUnit(str1) 1266 unit.target = str2 1267 checker = StandardChecker(excludefilters=ignorelist) 1268 failures = checker.run_filters(unit) 1269 for test in failures: 1270 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1271 return failures
1272 1273
1274 -def batchruntests(pairs):
1275 """runs test on a batch of string pairs""" 1276 passed, numpairs = 0, len(pairs) 1277 for str1, str2 in pairs: 1278 if runtests(str1, str2): 1279 passed += 1 1280 print 1281 print "total: %d/%d pairs passed" % (passed, numpairs)
1282 1283 if __name__ == '__main__': 1284 testset = [(r"simple", r"somple"), 1285 (r"\this equals \that", r"does \this equal \that?"), 1286 (r"this \'equals\' that", r"this 'equals' that"), 1287 (r" start and end! they must match.", r"start and end! they must match."), 1288 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1289 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1290 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1291 (r"%% %%", r"%%"), 1292 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1293 (r"simple lowercase", r"it is all lowercase"), 1294 (r"simple lowercase", r"It Is All Lowercase"), 1295 (r"Simple First Letter Capitals", r"First Letters"), 1296 (r"SIMPLE CAPITALS", r"First Letters"), 1297 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1298 (r"forgot to translate", r" "), 1299 ] 1300 batchruntests(testset) 1301