1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Classes for the support of Gettext .po and .pot files.
22
23 This implementation assumes that cpo is working. This should not be used
24 directly, but can be used once cpo has been established to work."""
25
26
27
28
29
30
31 import re
32 import copy
33 import cStringIO
34 import urllib
35
36 from translate.lang import data
37 from translate.misc.multistring import multistring
38 from translate.storage import pocommon, base, cpo, poparser
39 from translate.storage.pocommon import encodingToUse
40
41 lsep = " "
42 """Seperator for #: entries"""
43
44 basic_header = r'''msgid ""
45 msgstr ""
46 "Content-Type: text/plain; charset=UTF-8\n"
47 "Content-Transfer-Encoding: 8bit\n"
48 '''
49
50
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 __shallow__ = ['_store']
67
68 - def __init__(self, source=None, encoding="UTF-8"):
75
84
87
100 source = property(getsource, setsource)
101
103 """Returns the unescaped msgstr"""
104 return self._target
105
107 """Sets the msgstr to the given (unescaped) value"""
108 self._rich_target = None
109
110
111 if self.hasplural():
112 if isinstance(target, multistring):
113 self._target = target
114 else:
115
116 self._target = multistring(target)
117 elif isinstance(target, (dict, list)):
118 if len(target) == 1:
119 self._target = target[0]
120 else:
121 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
122 else:
123 self._target = target
124 target = property(gettarget, settarget)
125
127 """Return comments based on origin value (programmer, developer, source code and translator)"""
128 if origin == None:
129 comments = u"\n".join(self.othercomments)
130 comments += u"\n".join(self.automaticcomments)
131 elif origin == "translator":
132 comments = u"\n".join(self.othercomments)
133 elif origin in ["programmer", "developer", "source code"]:
134 comments = u"\n".join(self.automaticcomments)
135 else:
136 raise ValueError("Comment type not valid")
137 return comments
138
139 - def addnote(self, text, origin=None, position="append"):
140 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
141
142 if not (text and text.strip()):
143 return
144 text = data.forceunicode(text)
145 commentlist = self.othercomments
146 autocomments = False
147 if origin in ["programmer", "developer", "source code"]:
148 autocomments = True
149 commentlist = self.automaticcomments
150 if text.endswith(u'\n'):
151 text = text[:-1]
152 newcomments = text.split(u"\n")
153 if position == "append":
154 newcomments = commentlist + newcomments
155 elif position == "prepend":
156 newcomments = newcomments + commentlist
157
158 if autocomments:
159 self.automaticcomments = newcomments
160 else:
161 self.othercomments = newcomments
162
164 """Remove all the translator's notes (other comments)"""
165 self.othercomments = []
166
168
169 new_unit = self.__class__()
170
171
172 shallow = set(self.__shallow__)
173
174 for key, value in self.__dict__.iteritems():
175 if key not in shallow:
176 setattr(new_unit, key, copy.deepcopy(value))
177
178 for key in set(shallow):
179 setattr(new_unit, key, getattr(self, key))
180
181
182 memo[id(self)] = self
183
184 return new_unit
185
187 return copy.deepcopy(self)
188
194
200
201 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
202 """Merges the otherpo (with the same msgid) into this one.
203
204 Overwrite non-blank self.msgstr only if overwrite is True
205 merge comments only if comments is True
206 """
207
208 def mergelists(list1, list2, split=False):
209
210 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
211 for position, item in enumerate(list1):
212 if isinstance(item, str):
213 list1[position] = item.decode("utf-8")
214 for position, item in enumerate(list2):
215 if isinstance(item, str):
216 list2[position] = item.decode("utf-8")
217
218
219 lineend = ""
220 if list2 and list2[0]:
221 for candidate in ["\n", "\r", "\n\r"]:
222 if list2[0].endswith(candidate):
223 lineend = candidate
224 if not lineend:
225 lineend = ""
226
227
228 if split:
229 splitlist1 = []
230 splitlist2 = []
231 for item in list1:
232 splitlist1.extend(item.split())
233 for item in list2:
234 splitlist2.extend(item.split())
235 list1.extend([item for item in splitlist2 if not item in splitlist1])
236 else:
237
238 if list1 != list2:
239 for item in list2:
240 item = item.rstrip(lineend)
241
242 if item not in list1 or len(item) < 5:
243 list1.append(item)
244
245 if not isinstance(otherpo, pounit):
246 super(pounit, self).merge(otherpo, overwrite, comments)
247 return
248 if comments:
249 mergelists(self.othercomments, otherpo.othercomments)
250 mergelists(self.typecomments, otherpo.typecomments)
251 if not authoritative:
252
253
254 mergelists(self.automaticcomments, otherpo.automaticcomments)
255
256 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
257 if not self.istranslated() or overwrite:
258
259 if pocommon.extract_msgid_comment(otherpo.target):
260 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '')
261 self.target = otherpo.target
262 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
263 self.markfuzzy()
264 else:
265 self.markfuzzy(otherpo.isfuzzy())
266 elif not otherpo.istranslated():
267 if self.source != otherpo.source:
268 self.markfuzzy()
269 else:
270 if self.target != otherpo.target:
271 self.markfuzzy()
272
274
275 return not self.getid() and len(self.target) > 0
276
283
288
297
307
310
313
316
318 """Makes this unit obsolete"""
319 self.sourcecomments = []
320 self.automaticcomments = []
321 super(pounit, self).makeobsolete()
322
327
331
333 """convert to a string. double check that unicode is handled somehow here"""
334 _cpo_unit = cpo.pounit.buildfromunit(self)
335 return str(_cpo_unit)
336
338 """Get a list of locations from sourcecomments in the PO unit
339
340 rtype: List
341 return: A list of the locations with '#: ' stripped
342
343 """
344
345 return [urllib.unquote_plus(loc) for loc in self.sourcecomments]
346
348 """Add a location to sourcecomments in the PO unit
349
350 @param location: Text location e.g. 'file.c:23' does not include #:
351 @type location: String
352 """
353 if location.find(" ") != -1:
354 location = urllib.quote_plus(location)
355 self.sourcecomments.extend(location.split())
356
367
368 - def getcontext(self):
369 """Get the message context."""
370 return self._msgctxt + self.msgidcomment
371
372 - def setcontext(self, context):
373 context = data.forceunicode(context or u"")
374 self._msgctxt = context
375
390
423 buildfromunit = classmethod(buildfromunit)
424
425
426 -class pofile(pocommon.pofile):
427 """A .po file containing various units"""
428 UnitClass = pounit
429
431 """Deprecated: changes the encoding on the file."""
432
433
434
435 raise DeprecationWarning
436
437 self._encoding = encodingToUse(newencoding)
438 if not self.units:
439 return
440 header = self.header()
441 if not header or header.isblank():
442 return
443 charsetline = None
444 headerstr = header.target
445 for line in headerstr.split("\n"):
446 if not ":" in line:
447 continue
448 key, value = line.strip().split(":", 1)
449 if key.strip() != "Content-Type":
450 continue
451 charsetline = line
452 if charsetline is None:
453 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
454 else:
455 charset = re.search("charset=([^ ]*)", charsetline)
456 if charset is None:
457 newcharsetline = charsetline
458 if not newcharsetline.strip().endswith(";"):
459 newcharsetline += ";"
460 newcharsetline += " charset=%s" % self._encoding
461 else:
462 charset = charset.group(1)
463 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
464 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
465 header.target = headerstr
466
468 """Builds up this store from the internal cpo store.
469
470 A user must ensure that self._cpo_store already exists, and that it is
471 deleted afterwards."""
472 for unit in self._cpo_store.units:
473 self.addunit(self.UnitClass.buildfromunit(unit))
474 self._encoding = self._cpo_store._encoding
475
477 """Builds the internal cpo store from the data in self.
478
479 A user must ensure that self._cpo_store does not exist, and should
480 delete it after using it."""
481 self._cpo_store = cpo.pofile(noheader=True)
482 for unit in self.units:
483 if not unit.isblank():
484 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit, self._encoding))
485 if not self._cpo_store.header():
486
487 self._cpo_store.makeheader(charset=self._encoding, encoding="8bit")
488
490 """Parses the given file or file source string."""
491 try:
492 if hasattr(input, 'name'):
493 self.filename = input.name
494 elif not getattr(self, 'filename', ''):
495 self.filename = ''
496 tmp_header_added = False
497
498
499
500 self.units = []
501 self._cpo_store = cpo.pofile(input, noheader=True)
502 self._build_self_from_cpo()
503 del self._cpo_store
504 if tmp_header_added:
505 self.units = self.units[1:]
506 except Exception, e:
507 raise base.ParseError(e)
508
510 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
511
512
513 id_dict = {}
514 uniqueunits = []
515
516
517 markedpos = []
518
519 def addcomment(thepo):
520 thepo.msgidcomment = " ".join(thepo.getlocations())
521 markedpos.append(thepo)
522 for thepo in self.units:
523 id = thepo.getid()
524 if thepo.isheader() and not thepo.getlocations():
525
526 uniqueunits.append(thepo)
527 elif id in id_dict:
528 if duplicatestyle == "merge":
529 if id:
530 id_dict[id].merge(thepo)
531 else:
532 addcomment(thepo)
533 uniqueunits.append(thepo)
534 elif duplicatestyle == "msgctxt":
535 origpo = id_dict[id]
536 if origpo not in markedpos:
537 origpo._msgctxt += " ".join(origpo.getlocations())
538 markedpos.append(thepo)
539 thepo._msgctxt += " ".join(thepo.getlocations())
540 uniqueunits.append(thepo)
541 else:
542 if not id:
543 if duplicatestyle == "merge":
544 addcomment(thepo)
545 else:
546 thepo._msgctxt += u" ".join(thepo.getlocations())
547 id_dict[id] = thepo
548 uniqueunits.append(thepo)
549 self.units = uniqueunits
550
552 """Convert to a string. double check that unicode is handled somehow here"""
553 self._cpo_store = cpo.pofile(encoding=self._encoding, noheader=True)
554 try:
555 self._build_cpo_from_self()
556 except UnicodeEncodeError, e:
557 self._encoding = "utf-8"
558 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8")
559 self._build_cpo_from_self()
560 output = str(self._cpo_store)
561 del self._cpo_store
562 return output
563