Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  import re 
 32  import copy 
 33  import cStringIO 
 34  import urllib 
 35   
 36  from translate.lang import data 
 37  from translate.misc.multistring import multistring 
 38  from translate.storage import pocommon, base, cpo, poparser 
 39  from translate.storage.pocommon import encodingToUse 
 40   
 41  lsep = " " 
 42  """Seperator for #: entries""" 
 43   
 44  basic_header = r'''msgid "" 
 45  msgstr "" 
 46  "Content-Type: text/plain; charset=UTF-8\n" 
 47  "Content-Transfer-Encoding: 8bit\n" 
 48  ''' 
 49   
 50   
51 -class pounit(pocommon.pounit):
52 # othercomments = [] # # this is another comment 53 # automaticcomments = [] # #. comment extracted from the source code 54 # sourcecomments = [] # #: sourcefile.xxx:35 55 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 56 # prev_msgid = [] # 57 # prev_msgid_plural = [] # 58 # typecomments = [] # #, fuzzy 59 # msgidcomment = u"" # _: within msgid 60 # msgctxt 61 # msgid = [] 62 # msgstr = [] 63 64 # Our homegrown way to indicate what must be copied in a shallow 65 # fashion 66 __shallow__ = ['_store'] 67
68 - def __init__(self, source=None, encoding="UTF-8"):
69 pocommon.pounit.__init__(self, source) 70 self._encoding = encodingToUse(encoding) 71 self._initallcomments(blankall=True) 72 self._msgctxt = u"" 73 74 self.target = u""
75
76 - def _initallcomments(self, blankall=False):
77 """Initialises allcomments""" 78 if blankall: 79 self.othercomments = [] 80 self.automaticcomments = [] 81 self.sourcecomments = [] 82 self.typecomments = [] 83 self.msgidcomment = u""
84
85 - def getsource(self):
86 return self._source
87
88 - def setsource(self, source):
89 self._rich_source = None 90 # assert isinstance(source, unicode) 91 source = data.forceunicode(source or u"") 92 source = source or u"" 93 if isinstance(source, multistring): 94 self._source = source 95 elif isinstance(source, unicode): 96 self._source = source 97 else: 98 #unicode, list, dict 99 self._source = multistring(source)
100 source = property(getsource, setsource) 101
102 - def gettarget(self):
103 """Returns the unescaped msgstr""" 104 return self._target
105
106 - def settarget(self, target):
107 """Sets the msgstr to the given (unescaped) value""" 108 self._rich_target = None 109 # assert isinstance(target, unicode) 110 # target = data.forceunicode(target) 111 if self.hasplural(): 112 if isinstance(target, multistring): 113 self._target = target 114 else: 115 #unicode, list, dict 116 self._target = multistring(target) 117 elif isinstance(target, (dict, list)): 118 if len(target) == 1: 119 self._target = target[0] 120 else: 121 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 122 else: 123 self._target = target
124 target = property(gettarget, settarget) 125
126 - def getnotes(self, origin=None):
127 """Return comments based on origin value (programmer, developer, source code and translator)""" 128 if origin == None: 129 comments = u"\n".join(self.othercomments) 130 comments += u"\n".join(self.automaticcomments) 131 elif origin == "translator": 132 comments = u"\n".join(self.othercomments) 133 elif origin in ["programmer", "developer", "source code"]: 134 comments = u"\n".join(self.automaticcomments) 135 else: 136 raise ValueError("Comment type not valid") 137 return comments
138
139 - def addnote(self, text, origin=None, position="append"):
140 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 141 # ignore empty strings and strings without non-space characters 142 if not (text and text.strip()): 143 return 144 text = data.forceunicode(text) 145 commentlist = self.othercomments 146 autocomments = False 147 if origin in ["programmer", "developer", "source code"]: 148 autocomments = True 149 commentlist = self.automaticcomments 150 if text.endswith(u'\n'): 151 text = text[:-1] 152 newcomments = text.split(u"\n") 153 if position == "append": 154 newcomments = commentlist + newcomments 155 elif position == "prepend": 156 newcomments = newcomments + commentlist 157 158 if autocomments: 159 self.automaticcomments = newcomments 160 else: 161 self.othercomments = newcomments
162
163 - def removenotes(self):
164 """Remove all the translator's notes (other comments)""" 165 self.othercomments = []
166
167 - def __deepcopy__(self, memo={}):
168 # Make an instance to serve as the copy 169 new_unit = self.__class__() 170 # We'll be testing membership frequently, so make a set from 171 # self.__shallow__ 172 shallow = set(self.__shallow__) 173 # Make deep copies of all members which are not in shallow 174 for key, value in self.__dict__.iteritems(): 175 if key not in shallow: 176 setattr(new_unit, key, copy.deepcopy(value)) 177 # Make shallow copies of all members which are in shallow 178 for key in set(shallow): 179 setattr(new_unit, key, getattr(self, key)) 180 # Mark memo with ourself, so that we won't get deep copied 181 # again 182 memo[id(self)] = self 183 # Return our copied unit 184 return new_unit
185
186 - def copy(self):
187 return copy.deepcopy(self)
188
189 - def _msgidlen(self):
190 if self.hasplural(): 191 len("".join([string for string in self.source.strings])) 192 else: 193 return len(self.source)
194
195 - def _msgstrlen(self):
196 if self.hasplural(): 197 len("".join([string for string in self.target.strings])) 198 else: 199 return len(self.target)
200
201 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
202 """Merges the otherpo (with the same msgid) into this one. 203 204 Overwrite non-blank self.msgstr only if overwrite is True 205 merge comments only if comments is True 206 """ 207 208 def mergelists(list1, list2, split=False): 209 #decode where necessary 210 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 211 for position, item in enumerate(list1): 212 if isinstance(item, str): 213 list1[position] = item.decode("utf-8") 214 for position, item in enumerate(list2): 215 if isinstance(item, str): 216 list2[position] = item.decode("utf-8") 217 218 #Determine the newline style of list2 219 lineend = "" 220 if list2 and list2[0]: 221 for candidate in ["\n", "\r", "\n\r"]: 222 if list2[0].endswith(candidate): 223 lineend = candidate 224 if not lineend: 225 lineend = "" 226 227 #Split if directed to do so: 228 if split: 229 splitlist1 = [] 230 splitlist2 = [] 231 for item in list1: 232 splitlist1.extend(item.split()) 233 for item in list2: 234 splitlist2.extend(item.split()) 235 list1.extend([item for item in splitlist2 if not item in splitlist1]) 236 else: 237 #Normal merge, but conform to list1 newline style 238 if list1 != list2: 239 for item in list2: 240 item = item.rstrip(lineend) 241 # avoid duplicate comment lines (this might cause some problems) 242 if item not in list1 or len(item) < 5: 243 list1.append(item)
244 245 if not isinstance(otherpo, pounit): 246 super(pounit, self).merge(otherpo, overwrite, comments) 247 return 248 if comments: 249 mergelists(self.othercomments, otherpo.othercomments) 250 mergelists(self.typecomments, otherpo.typecomments) 251 if not authoritative: 252 # We don't bring across otherpo.automaticcomments as we consider ourself 253 # to be the the authority. Same applies to otherpo.msgidcomments 254 mergelists(self.automaticcomments, otherpo.automaticcomments) 255 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 256 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 257 if not self.istranslated() or overwrite: 258 # Remove kde-style comments from the translation (if any). XXX - remove 259 if pocommon.extract_msgid_comment(otherpo.target): 260 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 261 self.target = otherpo.target 262 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 263 self.markfuzzy() 264 else: 265 self.markfuzzy(otherpo.isfuzzy()) 266 elif not otherpo.istranslated(): 267 if self.source != otherpo.source: 268 self.markfuzzy() 269 else: 270 if self.target != otherpo.target: 271 self.markfuzzy()
272
273 - def isheader(self):
274 #TODO: fix up nicely 275 return not self.getid() and len(self.target) > 0
276
277 - def isblank(self):
278 if self.isheader() or self.msgidcomment: 279 return False 280 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 281 return True 282 return False
283
284 - def hastypecomment(self, typecomment):
285 """Check whether the given type comment is present""" 286 # check for word boundaries properly by using a regular expression... 287 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
288
289 - def hasmarkedcomment(self, commentmarker):
290 """Check whether the given comment marker is present as # (commentmarker) ...""" 291 # raise DeprecationWarning 292 commentmarker = "(%s)" % commentmarker 293 for comment in self.othercomments: 294 if comment.startswith(commentmarker): 295 return True 296 return False
297
298 - def settypecomment(self, typecomment, present=True):
299 """Alters whether a given typecomment is present""" 300 if self.hastypecomment(typecomment) != present: 301 if present: 302 self.typecomments.append("#, %s\n" % typecomment) 303 else: 304 # this should handle word boundaries properly ... 305 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 306 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
307
308 - def istranslated(self):
309 return super(pounit, self).istranslated() and not self.isobsolete()
310
311 - def istranslatable(self):
312 return not (self.isheader() or self.isblank() or self.isobsolete())
313
314 - def _domarkfuzzy(self, present=True):
315 pass
316
317 - def makeobsolete(self):
318 """Makes this unit obsolete""" 319 self.sourcecomments = [] 320 self.automaticcomments = [] 321 super(pounit, self).makeobsolete()
322
323 - def hasplural(self):
324 """returns whether this pounit contains plural strings...""" 325 source = self.source 326 return isinstance(source, multistring) and len(source.strings) > 1
327
328 - def parse(self, src):
329 raise DeprecationWarning("Should not be parsing with a unit") 330 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
331
332 - def __str__(self):
333 """convert to a string. double check that unicode is handled somehow here""" 334 _cpo_unit = cpo.pounit.buildfromunit(self) 335 return str(_cpo_unit)
336
337 - def getlocations(self):
338 """Get a list of locations from sourcecomments in the PO unit 339 340 rtype: List 341 return: A list of the locations with '#: ' stripped 342 343 """ 344 #TODO: rename to .locations 345 return [urllib.unquote_plus(loc) for loc in self.sourcecomments]
346
347 - def addlocation(self, location):
348 """Add a location to sourcecomments in the PO unit 349 350 @param location: Text location e.g. 'file.c:23' does not include #: 351 @type location: String 352 """ 353 if location.find(" ") != -1: 354 location = urllib.quote_plus(location) 355 self.sourcecomments.extend(location.split())
356
357 - def _extract_msgidcomments(self, text=None):
358 """Extract KDE style msgid comments from the unit. 359 360 @rtype: String 361 @return: Returns the extracted msgidcomments found in this unit's msgid. 362 """ 363 if text: 364 return pocommon.extract_msgid_comment(text) 365 else: 366 return self.msgidcomment
367
368 - def getcontext(self):
369 """Get the message context.""" 370 return self._msgctxt + self.msgidcomment
371
372 - def setcontext(self, context):
373 context = data.forceunicode(context or u"") 374 self._msgctxt = context
375
376 - def getid(self):
377 """Returns a unique identifier for this unit.""" 378 context = self.getcontext() 379 # Gettext does not consider the plural to determine duplicates, only 380 # the msgid. For generation of .mo files, we might want to use this 381 # code to generate the entry for the hash table, but for now, it is 382 # commented out for conformance to gettext. 383 # id = '\0'.join(self.source.strings) 384 id = self.source 385 if self.msgidcomment: 386 id = u"_: %s\n%s" % (context, id) 387 elif context: 388 id = u"%s\04%s" % (context, id) 389 return id
390
391 - def buildfromunit(cls, unit):
392 """Build a native unit from a foreign unit, preserving as much 393 information as possible.""" 394 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 395 return unit.copy() 396 elif isinstance(unit, pocommon.pounit): 397 newunit = cls(unit.source) 398 newunit.target = unit.target 399 #context 400 newunit.msgidcomment = unit._extract_msgidcomments() 401 if not newunit.msgidcomment: 402 newunit._msgctxt = unit.getcontext() 403 404 locations = unit.getlocations() 405 if locations: 406 newunit.addlocations(locations) 407 notes = unit.getnotes("developer") 408 if notes: 409 newunit.addnote(notes, "developer") 410 notes = unit.getnotes("translator") 411 if notes: 412 newunit.addnote(notes, "translator") 413 newunit.markfuzzy(unit.isfuzzy()) 414 if unit.isobsolete(): 415 newunit.makeobsolete() 416 for tc in ['python-format', 'c-format', 'php-format']: 417 if unit.hastypecomment(tc): 418 newunit.settypecomment(tc) 419 break 420 return newunit 421 else: 422 return base.TranslationUnit.buildfromunit(unit)
423 buildfromunit = classmethod(buildfromunit) 424 425
426 -class pofile(pocommon.pofile):
427 """A .po file containing various units""" 428 UnitClass = pounit 429
430 - def changeencoding(self, newencoding):
431 """Deprecated: changes the encoding on the file.""" 432 # This should not be here but in poheader. It also shouldn't mangle the 433 # header itself, but use poheader methods. All users are removed, so 434 # we can deprecate after one release. 435 raise DeprecationWarning 436 437 self._encoding = encodingToUse(newencoding) 438 if not self.units: 439 return 440 header = self.header() 441 if not header or header.isblank(): 442 return 443 charsetline = None 444 headerstr = header.target 445 for line in headerstr.split("\n"): 446 if not ":" in line: 447 continue 448 key, value = line.strip().split(":", 1) 449 if key.strip() != "Content-Type": 450 continue 451 charsetline = line 452 if charsetline is None: 453 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 454 else: 455 charset = re.search("charset=([^ ]*)", charsetline) 456 if charset is None: 457 newcharsetline = charsetline 458 if not newcharsetline.strip().endswith(";"): 459 newcharsetline += ";" 460 newcharsetline += " charset=%s" % self._encoding 461 else: 462 charset = charset.group(1) 463 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 464 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 465 header.target = headerstr
466
467 - def _build_self_from_cpo(self):
468 """Builds up this store from the internal cpo store. 469 470 A user must ensure that self._cpo_store already exists, and that it is 471 deleted afterwards.""" 472 for unit in self._cpo_store.units: 473 self.addunit(self.UnitClass.buildfromunit(unit)) 474 self._encoding = self._cpo_store._encoding
475
476 - def _build_cpo_from_self(self):
477 """Builds the internal cpo store from the data in self. 478 479 A user must ensure that self._cpo_store does not exist, and should 480 delete it after using it.""" 481 self._cpo_store = cpo.pofile(noheader=True) 482 for unit in self.units: 483 if not unit.isblank(): 484 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit, self._encoding)) 485 if not self._cpo_store.header(): 486 #only add a temporary header 487 self._cpo_store.makeheader(charset=self._encoding, encoding="8bit")
488
489 - def parse(self, input):
490 """Parses the given file or file source string.""" 491 try: 492 if hasattr(input, 'name'): 493 self.filename = input.name 494 elif not getattr(self, 'filename', ''): 495 self.filename = '' 496 tmp_header_added = False 497 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 498 # input = basic_header + input 499 # tmp_header_added = True 500 self.units = [] 501 self._cpo_store = cpo.pofile(input, noheader=True) 502 self._build_self_from_cpo() 503 del self._cpo_store 504 if tmp_header_added: 505 self.units = self.units[1:] 506 except Exception, e: 507 raise base.ParseError(e)
508
509 - def removeduplicates(self, duplicatestyle="merge"):
510 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 511 # TODO: can we handle consecutive calls to removeduplicates()? What 512 # about files already containing msgctxt? - test 513 id_dict = {} 514 uniqueunits = [] 515 # TODO: this is using a list as the pos aren't hashable, but this is slow. 516 # probably not used frequently enough to worry about it, though. 517 markedpos = [] 518 519 def addcomment(thepo): 520 thepo.msgidcomment = " ".join(thepo.getlocations()) 521 markedpos.append(thepo)
522 for thepo in self.units: 523 id = thepo.getid() 524 if thepo.isheader() and not thepo.getlocations(): 525 # header msgids shouldn't be merged... 526 uniqueunits.append(thepo) 527 elif id in id_dict: 528 if duplicatestyle == "merge": 529 if id: 530 id_dict[id].merge(thepo) 531 else: 532 addcomment(thepo) 533 uniqueunits.append(thepo) 534 elif duplicatestyle == "msgctxt": 535 origpo = id_dict[id] 536 if origpo not in markedpos: 537 origpo._msgctxt += " ".join(origpo.getlocations()) 538 markedpos.append(thepo) 539 thepo._msgctxt += " ".join(thepo.getlocations()) 540 uniqueunits.append(thepo) 541 else: 542 if not id: 543 if duplicatestyle == "merge": 544 addcomment(thepo) 545 else: 546 thepo._msgctxt += u" ".join(thepo.getlocations()) 547 id_dict[id] = thepo 548 uniqueunits.append(thepo) 549 self.units = uniqueunits
550
551 - def __str__(self):
552 """Convert to a string. double check that unicode is handled somehow here""" 553 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True) 554 try: 555 self._build_cpo_from_self() 556 except UnicodeEncodeError, e: 557 self._encoding = "utf-8" 558 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 559 self._build_cpo_from_self() 560 output = str(self._cpo_store) 561 del self._cpo_store 562 return output
563