Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  from translate.misc.multistring import multistring 
 32  from translate.lang import data 
 33  from translate.storage import pocommon, base, cpo, poparser 
 34  from translate.storage.pocommon import encodingToUse 
 35  import re 
 36  import copy 
 37  import cStringIO 
 38   
 39  lsep = " " 
 40  """Seperator for #: entries""" 
 41   
 42  basic_header = r'''msgid "" 
 43  msgstr "" 
 44  "Content-Type: text/plain; charset=UTF-8\n" 
 45  "Content-Transfer-Encoding: 8bit\n" 
 46  ''' 
 47   
48 -class pounit(pocommon.pounit):
49 # othercomments = [] # # this is another comment 50 # automaticcomments = [] # #. comment extracted from the source code 51 # sourcecomments = [] # #: sourcefile.xxx:35 52 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 53 # prev_msgid = [] # 54 # prev_msgid_plural = [] # 55 # typecomments = [] # #, fuzzy 56 # msgidcomment = u"" # _: within msgid 57 # msgctxt 58 # msgid = [] 59 # msgstr = [] 60 61 # Our homegrown way to indicate what must be copied in a shallow 62 # fashion 63 __shallow__ = ['_store'] 64
65 - def __init__(self, source=None, encoding="UTF-8"):
66 pocommon.pounit.__init__(self, source) 67 self._encoding = encodingToUse(encoding) 68 self.obsolete = False 69 self._initallcomments(blankall=True) 70 self._msgctxt = u"" 71 72 self.target = u""
73
74 - def _initallcomments(self, blankall=False):
75 """Initialises allcomments""" 76 if blankall: 77 self.othercomments = [] 78 self.automaticcomments = [] 79 self.sourcecomments = [] 80 self.typecomments = [] 81 self.msgidcomment = u""
82
83 - def getsource(self):
84 return self._source
85
86 - def setsource(self, source):
87 self._rich_source = None 88 # assert isinstance(source, unicode) 89 source = data.forceunicode(source or u"") 90 source = source or u"" 91 if isinstance(source, multistring): 92 self._source = source 93 elif isinstance(source, unicode): 94 self._source = source 95 else: 96 #unicode, list, dict 97 self._source = multistring(source)
98 source = property(getsource, setsource) 99 100
101 - def gettarget(self):
102 """Returns the unescaped msgstr""" 103 return self._target
104
105 - def settarget(self, target):
106 """Sets the msgstr to the given (unescaped) value""" 107 self._rich_target = None 108 # assert isinstance(target, unicode) 109 # target = data.forceunicode(target) 110 if self.hasplural(): 111 if isinstance(target, multistring): 112 self._target = target 113 else: 114 #unicode, list, dict 115 self._target = multistring(target) 116 elif isinstance(target, (dict, list)): 117 if len(target) == 1: 118 self._target = target[0] 119 else: 120 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 121 else: 122 self._target = target
123 target = property(gettarget, settarget) 124
125 - def getnotes(self, origin=None):
126 """Return comments based on origin value (programmer, developer, source code and translator)""" 127 if origin == None: 128 comments = u"\n".join(self.othercomments) 129 comments += u"\n".join(self.automaticcomments) 130 elif origin == "translator": 131 comments = u"\n".join(self.othercomments) 132 elif origin in ["programmer", "developer", "source code"]: 133 comments = u"\n".join(self.automaticcomments) 134 else: 135 raise ValueError("Comment type not valid") 136 return comments
137
138 - def addnote(self, text, origin=None, position="append"):
139 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 140 # ignore empty strings and strings without non-space characters 141 if not (text and text.strip()): 142 return 143 text = data.forceunicode(text) 144 commentlist = self.othercomments 145 autocomments = False 146 if origin in ["programmer", "developer", "source code"]: 147 autocomments = True 148 commentlist = self.automaticcomments 149 if text.endswith(u'\n'): 150 text = text[:-1] 151 newcomments = text.split(u"\n") 152 if position == "append": 153 newcomments = commentlist + newcomments 154 elif position == "prepend": 155 newcomments = newcomments + commentlist 156 157 if autocomments: 158 self.automaticcomments = newcomments 159 else: 160 self.othercomments = newcomments
161
162 - def removenotes(self):
163 """Remove all the translator's notes (other comments)""" 164 self.othercomments = []
165
166 - def __deepcopy__(self, memo={}):
167 # Make an instance to serve as the copy 168 new_unit = self.__class__() 169 # We'll be testing membership frequently, so make a set from 170 # self.__shallow__ 171 shallow = set(self.__shallow__) 172 # Make deep copies of all members which are not in shallow 173 for key, value in self.__dict__.iteritems(): 174 if key not in shallow: 175 setattr(new_unit, key, copy.deepcopy(value)) 176 # Make shallow copies of all members which are in shallow 177 for key in set(shallow): 178 setattr(new_unit, key, getattr(self, key)) 179 # Mark memo with ourself, so that we won't get deep copied 180 # again 181 memo[id(self)] = self 182 # Return our copied unit 183 return new_unit
184
185 - def copy(self):
186 return copy.deepcopy(self)
187
188 - def _msgidlen(self):
189 if self.hasplural(): 190 len("".join([string for string in self.source.strings])) 191 else: 192 return len(self.source)
193
194 - def _msgstrlen(self):
195 if self.hasplural(): 196 len("".join([string for string in self.target.strings])) 197 else: 198 return len(self.target)
199
200 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
201 """Merges the otherpo (with the same msgid) into this one. 202 203 Overwrite non-blank self.msgstr only if overwrite is True 204 merge comments only if comments is True 205 """ 206 207 def mergelists(list1, list2, split=False): 208 #Split if directed to do so: 209 if split: 210 splitlist1 = [] 211 splitlist2 = [] 212 for item in list1: 213 splitlist1.extend(item.split()) 214 for item in list2: 215 splitlist2.extend(item.split()) 216 list1.extend([item for item in splitlist2 if not item in splitlist1]) 217 else: 218 #Normal merge, but conform to list1 newline style 219 if list1 != list2: 220 for item in list2: 221 # avoid duplicate comment lines (this might cause some problems) 222 if item not in list1 or len(item) < 5: 223 list1.append(item)
224 225 if not isinstance(otherpo, pounit): 226 super(pounit, self).merge(otherpo, overwrite, comments) 227 return 228 if comments: 229 mergelists(self.othercomments, otherpo.othercomments) 230 mergelists(self.typecomments, otherpo.typecomments) 231 if not authoritative: 232 # We don't bring across otherpo.automaticcomments as we consider ourself 233 # to be the the authority. Same applies to otherpo.msgidcomments 234 mergelists(self.automaticcomments, otherpo.automaticcomments) 235 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 236 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 237 if not self.istranslated() or overwrite: 238 # Remove kde-style comments from the translation (if any). XXX - remove 239 if pocommon.extract_msgid_comment(otherpo.target): 240 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 241 self.target = otherpo.target 242 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 243 self.markfuzzy() 244 else: 245 self.markfuzzy(otherpo.isfuzzy()) 246 elif not otherpo.istranslated(): 247 if self.source != otherpo.source: 248 self.markfuzzy() 249 else: 250 if self.target != otherpo.target: 251 self.markfuzzy()
252
253 - def isheader(self):
254 #TODO: fix up nicely 255 return not self.getid() and len(self.target) > 0
256
257 - def isblank(self):
258 if self.isheader() or self.msgidcomment: 259 return False 260 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 261 return True 262 return False
263
264 - def hastypecomment(self, typecomment):
265 """Check whether the given type comment is present""" 266 # check for word boundaries properly by using a regular expression... 267 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
268
269 - def hasmarkedcomment(self, commentmarker):
270 """Check whether the given comment marker is present as # (commentmarker) ...""" 271 # raise DeprecationWarning 272 commentmarker = "(%s)" % commentmarker 273 for comment in self.othercomments: 274 if comment.startswith(commentmarker): 275 return True 276 return False
277
278 - def settypecomment(self, typecomment, present=True):
279 """Alters whether a given typecomment is present""" 280 if self.hastypecomment(typecomment) != present: 281 if present: 282 self.typecomments.append("#, %s\n" % typecomment) 283 else: 284 # this should handle word boundaries properly ... 285 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 286 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
287
288 - def istranslated(self):
289 return super(pounit, self).istranslated() and not self.isobsolete()
290
291 - def istranslatable(self):
292 return not (self.isheader() or self.isblank() or self.obsolete)
293
294 - def isfuzzy(self):
295 return self.hastypecomment("fuzzy")
296
297 - def markfuzzy(self, present=True):
298 self.settypecomment("fuzzy", present)
299
300 - def isobsolete(self):
301 return self.obsolete
302
303 - def makeobsolete(self):
304 """Makes this unit obsolete""" 305 self.obsolete = True 306 self.sourcecomments = [] 307 self.automaticcomments = []
308
309 - def resurrect(self):
310 """Makes an obsolete unit normal""" 311 self.obsolete = False
312
313 - def hasplural(self):
314 """returns whether this pounit contains plural strings...""" 315 source = self.source 316 return isinstance(source, multistring) and len(source.strings) > 1
317
318 - def parse(self, src):
319 raise DeprecationWarning("Should not be parsing with a unit") 320 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
321
322 - def __str__(self):
323 """convert to a string. double check that unicode is handled somehow here""" 324 _cpo_unit = cpo.pounit.buildfromunit(self) 325 return str(_cpo_unit)
326
327 - def getlocations(self):
328 """Get a list of locations from sourcecomments in the PO unit 329 330 rtype: List 331 return: A list of the locations with '#: ' stripped 332 333 """ 334 #TODO: rename to .locations 335 return self.sourcecomments
336
337 - def addlocation(self, location):
338 """Add a location to sourcecomments in the PO unit 339 340 @param location: Text location e.g. 'file.c:23' does not include #: 341 @type location: String 342 """ 343 self.sourcecomments.extend(location.split())
344
345 - def _extract_msgidcomments(self, text=None):
346 """Extract KDE style msgid comments from the unit. 347 348 @rtype: String 349 @return: Returns the extracted msgidcomments found in this unit's msgid. 350 """ 351 if text: 352 return pocommon.extract_msgid_comment(text) 353 else: 354 return self.msgidcomment
355
356 - def getcontext(self):
357 """Get the message context.""" 358 return self._msgctxt + self.msgidcomment
359
360 - def getid(self):
361 """Returns a unique identifier for this unit.""" 362 context = self.getcontext() 363 # Gettext does not consider the plural to determine duplicates, only 364 # the msgid. For generation of .mo files, we might want to use this 365 # code to generate the entry for the hash table, but for now, it is 366 # commented out for conformance to gettext. 367 # id = '\0'.join(self.source.strings) 368 id = self.source 369 if self.msgidcomment: 370 id = u"_: %s\n%s" % (context, id) 371 elif context: 372 id = u"%s\04%s" % (context, id) 373 return id
374
375 - def buildfromunit(cls, unit):
376 """Build a native unit from a foreign unit, preserving as much 377 information as possible.""" 378 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 379 return unit.copy() 380 elif isinstance(unit, pocommon.pounit): 381 newunit = cls(unit.source) 382 newunit.target = unit.target 383 #context 384 newunit.msgidcomment = unit._extract_msgidcomments() 385 if not newunit.msgidcomment: 386 newunit._msgctxt = unit.getcontext() 387 388 locations = unit.getlocations() 389 if locations: 390 newunit.addlocations(locations) 391 notes = unit.getnotes("developer") 392 if notes: 393 newunit.addnote(notes, "developer") 394 notes = unit.getnotes("translator") 395 if notes: 396 newunit.addnote(notes, "translator") 397 if unit.isobsolete(): 398 newunit.makeobsolete() 399 newunit.markfuzzy(unit.isfuzzy()) 400 for tc in ['python-format', 'c-format', 'php-format']: 401 if unit.hastypecomment(tc): 402 newunit.settypecomment(tc) 403 break 404 return newunit 405 else: 406 return base.TranslationUnit.buildfromunit(unit)
407 buildfromunit = classmethod(buildfromunit) 408
409 -class pofile(pocommon.pofile):
410 """A .po file containing various units""" 411 UnitClass = pounit 412
413 - def changeencoding(self, newencoding):
414 """Deprecated: changes the encoding on the file.""" 415 # This should not be here but in poheader. It also shouldn't mangle the 416 # header itself, but use poheader methods. All users are removed, so 417 # we can deprecate after one release. 418 raise DeprecationWarning 419 420 self._encoding = encodingToUse(newencoding) 421 if not self.units: 422 return 423 header = self.header() 424 if not header or header.isblank(): 425 return 426 charsetline = None 427 headerstr = header.target 428 for line in headerstr.split("\n"): 429 if not ":" in line: 430 continue 431 key, value = line.strip().split(":", 1) 432 if key.strip() != "Content-Type": 433 continue 434 charsetline = line 435 if charsetline is None: 436 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 437 else: 438 charset = re.search("charset=([^ ]*)", charsetline) 439 if charset is None: 440 newcharsetline = charsetline 441 if not newcharsetline.strip().endswith(";"): 442 newcharsetline += ";" 443 newcharsetline += " charset=%s" % self._encoding 444 else: 445 charset = charset.group(1) 446 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 447 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 448 header.target = headerstr
449
450 - def _build_self_from_cpo(self):
451 """Builds up this store from the internal cpo store. 452 453 A user must ensure that self._cpo_store already exists, and that it is 454 deleted afterwards.""" 455 for unit in self._cpo_store.units: 456 self.addunit(self.UnitClass.buildfromunit(unit)) 457 self._encoding = self._cpo_store._encoding
458
459 - def _build_cpo_from_self(self):
460 """Builds the internal cpo store from the data in self. 461 462 A user must ensure that self._cpo_store does not exist, and should 463 delete it after using it.""" 464 self._cpo_store = cpo.pofile() 465 for unit in self.units: 466 if not unit.isblank(): 467 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit)) 468 if not self._cpo_store.header(): 469 #only add a temporary header 470 self._cpo_store.makeheader(charset="utf-8", encoding="8bit")
471 472
473 - def parse(self, input):
474 """Parses the given file or file source string.""" 475 try: 476 if hasattr(input, 'name'): 477 self.filename = input.name 478 elif not getattr(self, 'filename', ''): 479 self.filename = '' 480 tmp_header_added = False 481 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 482 # input = basic_header + input 483 # tmp_header_added = True 484 self.units = [] 485 self._cpo_store = cpo.pofile(input) 486 self._build_self_from_cpo() 487 del self._cpo_store 488 if tmp_header_added: 489 self.units = self.units[1:] 490 except Exception, e: 491 raise base.ParseError(e)
492
493 - def removeduplicates(self, duplicatestyle="merge"):
494 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 495 # TODO: can we handle consecutive calls to removeduplicates()? What 496 # about files already containing msgctxt? - test 497 id_dict = {} 498 uniqueunits = [] 499 # TODO: this is using a list as the pos aren't hashable, but this is slow. 500 # probably not used frequently enough to worry about it, though. 501 markedpos = [] 502 def addcomment(thepo): 503 thepo.msgidcomment = " ".join(thepo.getlocations()) 504 markedpos.append(thepo)
505 for thepo in self.units: 506 id = thepo.getid() 507 if thepo.isheader() and not thepo.getlocations(): 508 # header msgids shouldn't be merged... 509 uniqueunits.append(thepo) 510 elif id in id_dict: 511 if duplicatestyle == "merge": 512 if id: 513 id_dict[id].merge(thepo) 514 else: 515 addcomment(thepo) 516 uniqueunits.append(thepo) 517 elif duplicatestyle == "msgctxt": 518 origpo = id_dict[id] 519 if origpo not in markedpos: 520 origpo._msgctxt += " ".join(origpo.getlocations()) 521 markedpos.append(thepo) 522 thepo._msgctxt += " ".join(thepo.getlocations()) 523 uniqueunits.append(thepo) 524 else: 525 if not id: 526 if duplicatestyle == "merge": 527 addcomment(thepo) 528 else: 529 thepo._msgctxt += u" ".join(thepo.getlocations()) 530 id_dict[id] = thepo 531 uniqueunits.append(thepo) 532 self.units = uniqueunits
533
534 - def __str__(self):
535 """Convert to a string. double check that unicode is handled somehow here""" 536 self._cpo_store = cpo.pofile(encoding=self._encoding) 537 self._build_cpo_from_self() 538 output = str(self._cpo_store) 539 del self._cpo_store 540 return output
541