Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base, poparser 
 30  from translate.storage.pocommon import encodingToUse 
 31  import re 
 32  import copy 
 33  import cStringIO 
 34   
 35  lsep = "\n#: " 
 36  """Seperator for #: entries""" 
 37   
 38  # general functions for quoting / unquoting po strings 
 39   
 40  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 41  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 42   
43 -def escapeforpo(line):
44 """Escapes a line for po format. assumes no \n occurs in the line. 45 46 @param line: unescaped text 47 """ 48 special_locations = [] 49 for special_key in po_escape_map: 50 special_locations.extend(quote.find_all(line, special_key)) 51 special_locations = dict.fromkeys(special_locations).keys() 52 special_locations.sort() 53 escaped_line = "" 54 last_location = 0 55 for location in special_locations: 56 escaped_line += line[last_location:location] 57 escaped_line += po_escape_map[line[location:location+1]] 58 last_location = location+1 59 escaped_line += line[last_location:] 60 return escaped_line
61
62 -def unescapehandler(escape):
63 64 return po_unescape_map.get(escape, escape)
65
66 -def wrapline(line):
67 """Wrap text for po files.""" 68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 69 70 # Lines should not start with a space... 71 if len(wrappedlines) > 1: 72 for index, line in enumerate(wrappedlines[1:]): 73 if line.startswith(' '): 74 # Remove the space at the beginning of the line: 75 wrappedlines[index+1] = line[1:] 76 77 # Append a space to the previous line: 78 wrappedlines[index] += ' ' 79 return wrappedlines
80
81 -def quoteforpo(text):
82 """quotes the given text for a PO file, returning quoted and escaped lines""" 83 polines = [] 84 if text is None: 85 return polines 86 lines = text.split("\n") 87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 88 if len(lines) != 2 or lines[1]: 89 polines.extend(['""']) 90 for line in lines[:-1]: 91 #TODO: We should only wrap after escaping 92 lns = wrapline(line) 93 if len(lns) > 0: 94 for ln in lns[:-1]: 95 polines.extend(['"' + escapeforpo(ln) + '"']) 96 if lns[-1]: 97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 98 else: 99 polines.extend(['"\\n"']) 100 if lines[-1]: 101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 102 return polines
103
104 -def extractpoline(line):
105 """Remove quote and unescape line from po file. 106 107 @param line: a quoted line from a po file (msgid or msgstr) 108 """ 109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 110 return extracted
111
112 -def unquotefrompo(postr):
113 return u"".join([extractpoline(line) for line in postr])
114
115 -def is_null(lst):
116 return lst == [] or len(lst) == 1 and lst[0] == '""'
117
118 -def extractstr(string):
119 left = string.find('"') 120 right = string.rfind('"') 121 if right > -1: 122 return string[left:right+1] 123 else: 124 return string[left:] + '"'
125
126 -class pounit(pocommon.pounit):
127 # othercomments = [] # # this is another comment 128 # automaticcomments = [] # #. comment extracted from the source code 129 # sourcecomments = [] # #: sourcefile.xxx:35 130 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 131 # prev_msgid = [] # 132 # prev_msgid_plural = [] # 133 # typecomments = [] # #, fuzzy 134 # msgidcomments = [] # _: within msgid 135 # msgctxt 136 # msgid = [] 137 # msgstr = [] 138 139 # Our homegrown way to indicate what must be copied in a shallow 140 # fashion 141 __shallow__ = ['_store'] 142
143 - def __init__(self, source=None, encoding="UTF-8"):
144 self._encoding = encodingToUse(encoding) 145 self.obsolete = False 146 self._initallcomments(blankall=True) 147 self.prev_msgctxt = [] 148 self.prev_msgid = [] 149 self.prev_msgid_plural = [] 150 self.msgctxt = [] 151 self.msgid = [] 152 self.msgid_pluralcomments = [] 153 self.msgid_plural = [] 154 self.msgstr = [] 155 self.obsoletemsgctxt = [] 156 self.obsoletemsgid = [] 157 self.obsoletemsgid_pluralcomments = [] 158 self.obsoletemsgid_plural = [] 159 self.obsoletemsgstr = [] 160 pocommon.pounit.__init__(self, source)
161
162 - def _initallcomments(self, blankall=False):
163 """Initialises allcomments""" 164 if blankall: 165 self.othercomments = [] 166 self.automaticcomments = [] 167 self.sourcecomments = [] 168 self.typecomments = [] 169 self.msgidcomments = [] 170 self.obsoletemsgidcomments = []
171
172 - def _get_all_comments(self):
173 return [self.othercomments, 174 self.automaticcomments, 175 self.sourcecomments, 176 self.typecomments, 177 self.msgidcomments, 178 self.obsoletemsgidcomments]
179 180 allcomments = property(_get_all_comments) 181
182 - def _get_source_vars(self, msgid, msgid_plural):
183 multi = multistring(unquotefrompo(msgid), self._encoding) 184 if self.hasplural(): 185 pluralform = unquotefrompo(msgid_plural) 186 if isinstance(pluralform, str): 187 pluralform = pluralform.decode(self._encoding) 188 multi.strings.append(pluralform) 189 return multi
190
191 - def _set_source_vars(self, source):
192 msgid = None 193 msgid_plural = None 194 if isinstance(source, str): 195 source = source.decode(self._encoding) 196 if isinstance(source, multistring): 197 source = source.strings 198 if isinstance(source, list): 199 msgid = quoteforpo(source[0]) 200 if len(source) > 1: 201 msgid_plural = quoteforpo(source[1]) 202 else: 203 msgid_plural = [] 204 else: 205 msgid = quoteforpo(source) 206 msgid_plural = [] 207 return msgid, msgid_plural
208
209 - def getsource(self):
210 """Returns the unescaped msgid""" 211 return self._get_source_vars(self.msgid, self.msgid_plural)
212
213 - def setsource(self, source):
214 """Sets the msgid to the given (unescaped) value. 215 216 @param source: an unescaped source string. 217 """ 218 self._rich_source = None 219 self.msgid, self.msgid_plural = self._set_source_vars(source)
220 source = property(getsource, setsource) 221
222 - def _get_prev_source(self):
223 """Returns the unescaped msgid""" 224 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
225
226 - def _set_prev_source(self, source):
227 """Sets the msgid to the given (unescaped) value. 228 229 @param source: an unescaped source string. 230 """ 231 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
232 prev_source = property(_get_prev_source, _set_prev_source) 233
234 - def gettarget(self):
235 """Returns the unescaped msgstr""" 236 if isinstance(self.msgstr, dict): 237 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 238 else: 239 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 240 return multi
241
242 - def settarget(self, target):
243 """Sets the msgstr to the given (unescaped) value""" 244 self._rich_target = None 245 if isinstance(target, str): 246 target = target.decode(self._encoding) 247 if self.hasplural(): 248 if isinstance(target, multistring): 249 target = target.strings 250 elif isinstance(target, basestring): 251 target = [target] 252 elif isinstance(target, (dict, list)): 253 if len(target) == 1: 254 target = target[0] 255 else: 256 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 257 templates = self.msgstr 258 if isinstance(templates, list): 259 templates = {0: templates} 260 if isinstance(target, list): 261 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 262 elif isinstance(target, dict): 263 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 264 else: 265 self.msgstr = quoteforpo(target)
266 target = property(gettarget, settarget) 267
268 - def getnotes(self, origin=None):
269 """Return comments based on origin value (programmer, developer, source code and translator)""" 270 if origin == None: 271 comments = u"".join([comment[2:] for comment in self.othercomments]) 272 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 273 elif origin == "translator": 274 comments = u"".join ([comment[2:] for comment in self.othercomments]) 275 elif origin in ["programmer", "developer", "source code"]: 276 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 277 else: 278 raise ValueError("Comment type not valid") 279 # Let's drop the last newline 280 return comments[:-1]
281
282 - def addnote(self, text, origin=None, position="append"):
283 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 284 # ignore empty strings and strings without non-space characters 285 if not (text and text.strip()): 286 return 287 text = data.forceunicode(text) 288 commentlist = self.othercomments 289 linestart = "# " 290 autocomments = False 291 if origin in ["programmer", "developer", "source code"]: 292 autocomments = True 293 commentlist = self.automaticcomments 294 linestart = "#. " 295 text = text.split("\n") 296 newcomments = [linestart + line + "\n" for line in text] 297 if position == "append": 298 newcomments = commentlist + newcomments 299 elif position == "prepend": 300 newcomments = newcomments + commentlist 301 302 if autocomments: 303 self.automaticcomments = newcomments 304 else: 305 self.othercomments = newcomments
306
307 - def removenotes(self):
308 """Remove all the translator's notes (other comments)""" 309 self.othercomments = []
310
311 - def __deepcopy__(self, memo={}):
312 # Make an instance to serve as the copy 313 new_unit = self.__class__() 314 # We'll be testing membership frequently, so make a set from 315 # self.__shallow__ 316 shallow = set(self.__shallow__) 317 # Make deep copies of all members which are not in shallow 318 for key, value in self.__dict__.iteritems(): 319 if key not in shallow: 320 setattr(new_unit, key, copy.deepcopy(value)) 321 # Make shallow copies of all members which are in shallow 322 for key in set(shallow): 323 setattr(new_unit, key, getattr(self, key)) 324 # Mark memo with ourself, so that we won't get deep copied 325 # again 326 memo[id(self)] = self 327 # Return our copied unit 328 return new_unit
329
330 - def copy(self):
331 return copy.deepcopy(self)
332
333 - def _msgidlen(self):
334 if self.hasplural(): 335 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 336 else: 337 return len(unquotefrompo(self.msgid))
338
339 - def _msgstrlen(self):
340 if isinstance(self.msgstr, dict): 341 combinedstr = "\n".join([unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()]) 342 return len(combinedstr) 343 else: 344 return len(unquotefrompo(self.msgstr))
345
346 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
347 """Merges the otherpo (with the same msgid) into this one. 348 349 Overwrite non-blank self.msgstr only if overwrite is True 350 merge comments only if comments is True 351 """ 352 353 def mergelists(list1, list2, split=False): 354 #decode where necessary 355 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 356 for position, item in enumerate(list1): 357 if isinstance(item, str): 358 list1[position] = item.decode("utf-8") 359 for position, item in enumerate(list2): 360 if isinstance(item, str): 361 list2[position] = item.decode("utf-8") 362 363 #Determine the newline style of list1 364 lineend = "" 365 if list1 and list1[0]: 366 for candidate in ["\n", "\r", "\n\r"]: 367 if list1[0].endswith(candidate): 368 lineend = candidate 369 if not lineend: 370 lineend = "" 371 else: 372 lineend = "\n" 373 374 #Split if directed to do so: 375 if split: 376 splitlist1 = [] 377 splitlist2 = [] 378 prefix = "#" 379 for item in list1: 380 splitlist1.extend(item.split()[1:]) 381 prefix = item.split()[0] 382 for item in list2: 383 splitlist2.extend(item.split()[1:]) 384 prefix = item.split()[0] 385 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 386 else: 387 #Normal merge, but conform to list1 newline style 388 if list1 != list2: 389 for item in list2: 390 if lineend: 391 item = item.rstrip() + lineend 392 # avoid duplicate comment lines (this might cause some problems) 393 if item not in list1 or len(item) < 5: 394 list1.append(item)
395 if not isinstance(otherpo, pounit): 396 super(pounit, self).merge(otherpo, overwrite, comments) 397 return 398 if comments: 399 mergelists(self.othercomments, otherpo.othercomments) 400 mergelists(self.typecomments, otherpo.typecomments) 401 if not authoritative: 402 # We don't bring across otherpo.automaticcomments as we consider ourself 403 # to be the the authority. Same applies to otherpo.msgidcomments 404 mergelists(self.automaticcomments, otherpo.automaticcomments) 405 mergelists(self.msgidcomments, otherpo.msgidcomments) 406 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 407 if not self.istranslated() or overwrite: 408 # Remove kde-style comments from the translation (if any). 409 if self._extract_msgidcomments(otherpo.target): 410 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 411 self.target = otherpo.target 412 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 413 self.markfuzzy() 414 else: 415 self.markfuzzy(otherpo.isfuzzy()) 416 elif not otherpo.istranslated(): 417 if self.source != otherpo.source: 418 self.markfuzzy() 419 else: 420 if self.target != otherpo.target: 421 self.markfuzzy()
422
423 - def isheader(self):
424 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 425 #rewritten here for performance: 426 return (is_null(self.msgid) 427 and not is_null(self.msgstr) 428 and self.msgidcomments == [] 429 and is_null(self.msgctxt) 430 )
431
432 - def isblank(self):
433 if self.isheader() or len(self.msgidcomments): 434 return False 435 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 436 return True 437 return False
438 # TODO: remove: 439 # Before, the equivalent of the following was the final return statement: 440 # return len(self.source.strip()) == 0 441
442 - def hastypecomment(self, typecomment):
443 """Check whether the given type comment is present""" 444 # check for word boundaries properly by using a regular expression... 445 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
446
447 - def hasmarkedcomment(self, commentmarker):
448 """Check whether the given comment marker is present as # (commentmarker) ...""" 449 commentmarker = "(%s)" % commentmarker 450 for comment in self.othercomments: 451 if comment.replace("#", "", 1).strip().startswith(commentmarker): 452 return True 453 return False
454
455 - def settypecomment(self, typecomment, present=True):
456 """Alters whether a given typecomment is present""" 457 if self.hastypecomment(typecomment) != present: 458 if present: 459 if len(self.typecomments): 460 # There is already a comment, so we have to add onto it 461 self.typecomments[0] = "%s, %s\n" % (self.typecomments[0][:-1], typecomment) 462 else: 463 self.typecomments.append("#, %s\n" % typecomment) 464 else: 465 # this should handle word boundaries properly ... 466 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 467 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
468
469 - def isfuzzy(self):
470 return self.hastypecomment("fuzzy")
471
472 - def markfuzzy(self, present=True):
473 self.settypecomment("fuzzy", present)
474
475 - def isobsolete(self):
476 return self.obsolete
477
478 - def makeobsolete(self):
479 """Makes this unit obsolete""" 480 self.obsolete = True 481 if self.msgctxt: 482 self.obsoletemsgctxt = self.msgctxt 483 if self.msgid: 484 self.obsoletemsgid = self.msgid 485 self.msgid = [] 486 if self.msgidcomments: 487 self.obsoletemsgidcomments = self.msgidcomments 488 self.msgidcomments = [] 489 if self.msgid_plural: 490 self.obsoletemsgid_plural = self.msgid_plural 491 self.msgid_plural = [] 492 if self.msgstr: 493 self.obsoletemsgstr = self.msgstr 494 self.msgstr = [] 495 self.sourcecomments = [] 496 self.automaticcomments = []
497
498 - def resurrect(self):
499 """Makes an obsolete unit normal""" 500 self.obsolete = False 501 if self.obsoletemsgctxt: 502 self.msgid = self.obsoletemsgctxt 503 self.obsoletemsgctxt = [] 504 if self.obsoletemsgid: 505 self.msgid = self.obsoletemsgid 506 self.obsoletemsgid = [] 507 if self.obsoletemsgidcomments: 508 self.msgidcomments = self.obsoletemsgidcomments 509 self.obsoletemsgidcomments = [] 510 if self.obsoletemsgid_plural: 511 self.msgid_plural = self.obsoletemsgid_plural 512 self.obsoletemsgid_plural = [] 513 if self.obsoletemsgstr: 514 self.msgstr = self.obsoletemsgstr 515 self.obsoletemgstr = []
516
517 - def hasplural(self):
518 """returns whether this pounit contains plural strings...""" 519 return len(self.msgid_plural) > 0
520
521 - def parse(self, src):
522 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
523
524 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
525 if isinstance(partlines, dict): 526 partkeys = partlines.keys() 527 partkeys.sort() 528 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 529 partstr = partname + " " 530 partstartline = 0 531 if len(partlines) > 0 and len(partcomments) == 0: 532 partstr += partlines[0] 533 partstartline = 1 534 elif len(partcomments) > 0: 535 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 536 # if there is a blank leader line, it must come before the comment 537 partstr += partlines[0] + '\n' 538 # but if the whole string is blank, leave it in 539 if len(partlines) > 1: 540 partstartline += 1 541 else: 542 # All partcomments should start on a newline 543 partstr += '""\n' 544 # combine comments into one if more than one 545 if len(partcomments) > 1: 546 combinedcomment = [] 547 for comment in partcomments: 548 comment = unquotefrompo([comment]) 549 if comment.startswith("_:"): 550 comment = comment[len("_:"):] 551 if comment.endswith("\\n"): 552 comment = comment[:-len("\\n")] 553 #Before we used to strip. Necessary in some cases? 554 combinedcomment.append(comment) 555 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 556 # comments first, no blank leader line needed 557 partstr += "\n".join(partcomments) 558 partstr = quote.rstripeol(partstr) 559 else: 560 partstr += '""' 561 partstr += '\n' 562 # add the rest 563 for partline in partlines[partstartline:]: 564 partstr += partline + '\n' 565 return partstr
566
567 - def _encodeifneccessary(self, output):
568 """encodes unicode strings and returns other strings unchanged""" 569 if isinstance(output, unicode): 570 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 571 return output.encode(encoding) 572 return output
573
574 - def __str__(self):
575 """convert to a string. double check that unicode is handled somehow here""" 576 output = self._getoutput() 577 return self._encodeifneccessary(output)
578
579 - def _getoutput(self):
580 """return this po element as a string""" 581 def add_prev_msgid_lines(lines, header, var): 582 if len(var) > 0: 583 lines.append("#| %s %s\n" % (header, var[0])) 584 lines.extend("#| %s\n" % line for line in var[1:])
585 586 def add_prev_msgid_info(lines): 587 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt) 588 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid) 589 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural) 590 591 lines = [] 592 lines.extend(self.othercomments) 593 if self.isobsolete(): 594 lines.extend(self.typecomments) 595 obsoletelines = [] 596 if self.obsoletemsgctxt: 597 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 598 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 599 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 600 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 601 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 602 for index, obsoleteline in enumerate(obsoletelines): 603 # We need to account for a multiline msgid or msgstr here 604 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 605 lines.extend(obsoletelines) 606 lines = [self._encodeifneccessary(line) for line in lines] 607 return "".join(lines) 608 # if there's no msgid don't do msgid and string, unless we're the header 609 # this will also discard any comments other than plain othercomments... 610 if is_null(self.msgid): 611 if not (self.isheader() or self.getcontext() or self.sourcecomments): 612 return "".join(lines) 613 lines.extend(self.automaticcomments) 614 lines.extend(self.sourcecomments) 615 lines.extend(self.typecomments) 616 add_prev_msgid_info(lines) 617 if self.msgctxt: 618 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 619 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 620 if self.msgid_plural or self.msgid_pluralcomments: 621 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 622 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 623 lines = [self._encodeifneccessary(line) for line in lines] 624 postr = "".join(lines) 625 return postr 626
627 - def getlocations(self):
628 """Get a list of locations from sourcecomments in the PO unit 629 630 rtype: List 631 return: A list of the locations with '#: ' stripped 632 633 """ 634 locations = [] 635 for sourcecomment in self.sourcecomments: 636 locations += quote.rstripeol(sourcecomment)[3:].split() 637 return locations
638
639 - def addlocation(self, location):
640 """Add a location to sourcecomments in the PO unit 641 642 @param location: Text location e.g. 'file.c:23' does not include #: 643 @type location: String 644 645 """ 646 self.sourcecomments.append("#: %s\n" % location)
647
648 - def _extract_msgidcomments(self, text=None):
649 """Extract KDE style msgid comments from the unit. 650 651 @rtype: String 652 @return: Returns the extracted msgidcomments found in this unit's msgid. 653 """ 654 655 if not text: 656 text = unquotefrompo(self.msgidcomments) 657 return text.split('\n')[0].replace('_: ', '', 1)
658
659 - def setmsgidcomment(self, msgidcomment):
660 if msgidcomment: 661 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 662 else: 663 self.msgidcomments = []
664 665 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 666
667 - def getcontext(self):
668 """Get the message context.""" 669 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
670
671 - def getid(self):
672 """Returns a unique identifier for this unit.""" 673 context = self.getcontext() 674 # Gettext does not consider the plural to determine duplicates, only 675 # the msgid. For generation of .mo files, we might want to use this 676 # code to generate the entry for the hash table, but for now, it is 677 # commented out for conformance to gettext. 678 # id = '\0'.join(self.source.strings) 679 id = self.source 680 if self.msgidcomments: 681 id = u"_: %s\n%s" % (context, id) 682 elif context: 683 id = u"%s\04%s" % (context, id) 684 return id
685
686 -class pofile(pocommon.pofile):
687 """A .po file containing various units""" 688 UnitClass = pounit 689
690 - def parse(self, input):
691 """Parses the given file or file source string.""" 692 try: 693 if hasattr(input, 'name'): 694 self.filename = input.name 695 elif not getattr(self, 'filename', ''): 696 self.filename = '' 697 if isinstance(input, str): 698 input = cStringIO.StringIO(input) 699 # clear units to get rid of automatically generated headers before parsing 700 self.units = [] 701 poparser.parse_units(poparser.ParseState(input, pounit), self) 702 except Exception, e: 703 raise base.ParseError(e)
704
705 - def removeduplicates(self, duplicatestyle="merge"):
706 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 707 # TODO: can we handle consecutive calls to removeduplicates()? What 708 # about files already containing msgctxt? - test 709 id_dict = {} 710 uniqueunits = [] 711 # TODO: this is using a list as the pos aren't hashable, but this is slow. 712 # probably not used frequently enough to worry about it, though. 713 markedpos = [] 714 def addcomment(thepo): 715 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 716 markedpos.append(thepo)
717 for thepo in self.units: 718 id = thepo.getid() 719 if thepo.isheader() and not thepo.getlocations(): 720 # header msgids shouldn't be merged... 721 uniqueunits.append(thepo) 722 elif id in id_dict: 723 if duplicatestyle == "merge": 724 if id: 725 id_dict[id].merge(thepo) 726 else: 727 addcomment(thepo) 728 uniqueunits.append(thepo) 729 elif duplicatestyle == "msgctxt": 730 origpo = id_dict[id] 731 if origpo not in markedpos: 732 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 733 markedpos.append(thepo) 734 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 735 uniqueunits.append(thepo) 736 else: 737 if not id: 738 if duplicatestyle == "merge": 739 addcomment(thepo) 740 else: 741 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 742 id_dict[id] = thepo 743 uniqueunits.append(thepo) 744 self.units = uniqueunits
745
746 - def __str__(self):
747 """Convert to a string. double check that unicode is handled somehow here""" 748 output = self._getoutput() 749 if isinstance(output, unicode): 750 return output.encode(getattr(self, "encoding", "UTF-8")) 751 return output
752
753 - def _getoutput(self):
754 """convert the units back to lines""" 755 lines = [] 756 for unit in self.units: 757 unitsrc = str(unit) + "\n" 758 lines.append(unitsrc) 759 lines = "".join(self.encode(lines)).rstrip() 760 #After the last pounit we will have \n\n and we only want to end in \n: 761 if lines: 762 lines += "\n" 763 return lines
764
765 - def encode(self, lines):
766 """encode any unicode strings in lines in self._encoding""" 767 newlines = [] 768 encoding = self._encoding 769 if encoding is None or encoding.lower() == "charset": 770 encoding = 'UTF-8' 771 for line in lines: 772 if isinstance(line, unicode): 773 line = line.encode(encoding) 774 newlines.append(line) 775 return newlines
776
777 - def decode(self, lines):
778 """decode any non-unicode strings in lines with self._encoding""" 779 newlines = [] 780 for line in lines: 781 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 782 try: 783 line = line.decode(self._encoding) 784 except UnicodeError, e: 785 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 786 newlines.append(line) 787 return newlines
788
789 - def unit_iter(self):
790 for unit in self.units: 791 if not (unit.isheader() or unit.isobsolete()): 792 yield unit
793