1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .properties, and similar, files that are used in
23 translating Java, Mozilla, MacOS and other software.
24
25 The L{propfile} class is a monolingual class with L{propunit} providing unit
26 level access.
27
28 The .properties store has become a general key value pair class with
29 L{Dialect} providing the ability to change the behaviour of the parsing
30 and handling of the various dialects.
31
32 Currently we support::
33 * Java .properties
34 * Mozilla .properties
35 * Adobe Flex files
36 * MacOS X .strings files
37 * Skype .lang files
38
39
40 Dialects
41 ========
42 The following provides references and descriptions of the various dialects supported::
43
44 Java
45 ----
46 Java .properties are supported completely except for the ability to drop
47 pairs that are not translated.
48
49 The following U{.properties file
50 description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>}
51 and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give
52 some good references to the .properties specification.
53
54 Properties file may also hold Java
55 U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>}
56 messages. No special handling is provided in this storage class for
57 MessageFormat, but this may be implemented in future.
58
59 All delimiter types, comments, line continuations and spaces handling in
60 delimeters are supported.
61
62 Mozilla
63 -------
64 Mozilla files use '=' as a delimiter, are UTF-8 encoded and thus don't need \\u
65 escaping. Any \\U values will be converted to correct Unicode characters.
66 `
67 Strings
68 -------
69 Mac OS X strings files are implemented using
70 U{these<http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html>}
71 U{two<http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html>}
72 articles as references.
73
74 Flex
75 ----
76 Adobe Flex files seem to be normal .properties files but in UTF-8 just like
77 Mozilla files. This
78 U{page<http://livedocs.adobe.com/flex/3/html/help.html?content=l10n_3.html>}
79 provides the information used to implement the dialect.
80
81 Skype
82 -----
83 Skype .lang files seem to be UTF-16 encoded .properties files.
84
85 Implementation
86 ==============
87
88 A simple summary of what is permissible follows.
89
90 Comments supported::
91 # a comment
92 ! a comment
93 // a comment (only at the beginning of a line)
94 /* a comment (not across multiple lines) */
95
96 Name and Value pairs::
97 # Delimiters
98 key = value
99 key : value
100 key value
101
102 # Space in key and around value
103 \ key\ = \ value
104
105 # Note that the b and c are escaped for epydoc rendering
106 b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123
107 c = a string with a continuation line \\
108 continuation line
109
110 # Special cases
111 # key with no value
112 key
113 # value no key (extractable in prop2po but not mergeable in po2prop)
114 =value
115
116 # .strings specific
117 "key" = "value";
118 '"
119 """
120
121 import re
122 import warnings
123 import logging
124
125 from translate.lang import data
126 from translate.misc import quote
127 from translate.misc.typecheck import accepts, returns, IsOneOf
128 from translate.storage import base
129
130
131
132
133 eol = "\n"
134
135
136 @accepts(unicode, [unicode])
137 @returns(IsOneOf(type(None), unicode), int)
138 -def _find_delimiter(line, delimiters):
139 """Find the type and position of the delimiter in a property line.
140
141 Property files can be delimeted by "=", ":" or whitespace (space for now).
142 We find the position of each delimiter, then find the one that appears
143 first.
144
145 @param line: A properties line
146 @type line: str
147 @param delimiters: valid delimiters
148 @type delimiters: list
149 @return: delimiter character and offset within L{line}
150 @rtype: Tuple (delimiter char, Offset Integer)
151 """
152 delimiter_dict = {}
153 for delimiter in delimiters:
154 delimiter_dict[delimiter] = -1
155 delimiters = delimiter_dict
156
157 for delimiter, pos in delimiters.iteritems():
158 prewhitespace = len(line) - len(line.lstrip())
159 pos = line.find(delimiter, prewhitespace)
160 while pos != -1:
161 if delimiters[delimiter] == -1 and line[pos-1] != u"\\":
162 delimiters[delimiter] = pos
163 break
164 pos = line.find(delimiter, pos + 1)
165
166 mindelimiter = None
167 minpos = -1
168 for delimiter, pos in delimiters.iteritems():
169 if pos == -1 or delimiter == u" ":
170 continue
171 if minpos == -1 or pos < minpos:
172 minpos = pos
173 mindelimiter = delimiter
174 if mindelimiter is None and delimiters.get(u" ", -1) != -1:
175
176 return (u" ", delimiters[" "])
177 if mindelimiter is not None and u" " in delimiters and delimiters[u" "] < delimiters[mindelimiter]:
178
179
180
181 if len(line[delimiters[u" "]:delimiters[mindelimiter]].strip()) > 0:
182 return (u" ", delimiters[u" "])
183 return (mindelimiter, minpos)
184
187 """Spelling error that is kept around for in case someone relies on it.
188
189 Deprecated."""
190 warnings.warn("deprecated use Dialect.find_delimiter instead", DeprecationWarning)
191 return _find_delimiter(line, DialectJava.delimiters)
192
197 """Determine whether L{line} has a line continuation marker.
198
199 .properties files can be terminated with a backslash (\\) indicating
200 that the 'value' continues on the next line. Continuation is only
201 valid if there are an odd number of backslashses (an even number
202 would result in a set of N/2 slashes not an escape)
203
204 @param line: A properties line
205 @type line: str
206 @return: Does L{line} end with a line continuation
207 @rtype: Boolean
208 """
209 pos = -1
210 count = 0
211 if len(line) == 0:
212 return False
213
214
215 while len(line) >= -pos and line[pos:][0] == "\\":
216 pos -= 1
217 count += 1
218 return (count % 2) == 1
219
220
221 @accepts(unicode)
222 @returns(unicode)
223 -def _key_strip(key):
224 """Cleanup whitespace found around a key
225
226 @param key: A properties key
227 @type key: str
228 @return: Key without any uneeded whitespace
229 @rtype: str
230 """
231 newkey = key.rstrip()
232
233 if newkey[-1:] == "\\":
234 newkey += key[len(newkey):len(newkey)+1]
235 return newkey.lstrip()
236
237 dialects = {}
238 default_dialect = "java"
243
247
281
287 register_dialect(DialectJava)
293 register_dialect(DialectFlex)
304 register_dialect(DialectMozilla)
315 register_dialect(DialectSkype)
319 name = "strings"
320 default_encoding = "utf-16"
321 delimiters = [u"="]
322 pair_terminator = u";"
323 key_wrap_char = u'"'
324 value_wrap_char = u'"'
325 drop_comments = ["/* No comment provided by engineer. */"]
326
328 """Strip uneeded characters from the key"""
329 newkey = key.rstrip().rstrip('"')
330
331 if newkey[-1:] == "\\":
332 newkey += key[len(newkey):len(newkey)+1]
333 return newkey.lstrip().lstrip('"')
334 key_strip = classmethod(key_strip)
335
337 """Strip uneeded characters from the value"""
338 newvalue = value.rstrip().rstrip(';').rstrip('"')
339
340 if newvalue[-1:] == "\\":
341 newvalue += value[len(newvalue):len(newvalue)+1]
342 return newvalue.lstrip().lstrip('"')
343 value_strip = classmethod(value_strip)
344
345 - def encode(cls, string, encoding=None):
347 encode = classmethod(encode)
348 register_dialect(DialectStrings)
349
350
351 -class propunit(base.TranslationUnit):
352 """an element of a properties file i.e. a name and value, and any comments
353 associated"""
354
355 - def __init__(self, source="", personality="java"):
365
370
374
375 source = property(getsource, setsource)
376
381
383 translation = quote.propertiesdecode(self.translation)
384 translation = re.sub(u"\\\\ ", u" ", translation)
385 return translation
386
387 target = property(gettarget, settarget)
388
394 encoding = property(_get_encoding)
395
402
404 """convert the element back into formatted lines for a .properties
405 file"""
406 notes = self.getnotes()
407 if notes:
408 notes += u"\n"
409 if self.isblank():
410 return notes + u"\n"
411 else:
412 self.value = self.personality.encode(self.source, self.encoding)
413 self.translation = self.personality.encode(self.target, self.encoding)
414 value = self.translation or self.value
415 return u"%(notes)s%(key)s%(del)s%(value)s\n" % {"notes": notes,
416 "key": self.name,
417 "del": self.delimiter,
418 "value": value}
419
422
423 - def addnote(self, text, origin=None, position="append"):
424 if origin in ['programmer', 'developer', 'source code', None]:
425 text = data.forceunicode(text)
426 self.comments.append(text)
427 else:
428 return super(propunit, self).addnote(text, origin=origin,
429 position=position)
430
432 if origin in ['programmer', 'developer', 'source code', None]:
433 return u'\n'.join(self.comments)
434 else:
435 return super(propunit, self).getnotes(origin)
436
439
441 """returns whether this is a blank element, containing only
442 comments."""
443 return not (self.name or self.value)
444
446 return bool(self.name)
447
450
453
454
455 -class propfile(base.TranslationStore):
456 """this class represents a .properties file, made up of propunits"""
457 UnitClass = propunit
458
459 - def __init__(self, inputfile=None, personality="java", encoding=None):
460 """construct a propfile, optionally reading in from inputfile"""
461 super(propfile, self).__init__(unitclass=self.UnitClass)
462 self.personality = get_dialect(personality)
463 self.encoding = encoding or self.personality.default_encoding
464 self.filename = getattr(inputfile, 'name', '')
465 if inputfile is not None:
466 propsrc = inputfile.read()
467 inputfile.close()
468 self.parse(propsrc)
469
470 - def parse(self, propsrc):
471 """read the source of a properties file in and include them as units"""
472 text, encoding = self.detect_encoding(propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16'])
473 self.encoding = encoding
474 propsrc = text
475
476 newunit = propunit("", self.personality.name)
477 inmultilinevalue = False
478
479 for line in propsrc.split(u"\n"):
480
481 line = quote.rstripeol(line)
482 if inmultilinevalue:
483 newunit.value += line.lstrip()
484
485 inmultilinevalue = is_line_continuation(newunit.value)
486
487 if inmultilinevalue:
488
489 newunit.value = newunit.value[:-1]
490 if not inmultilinevalue:
491
492 self.addunit(newunit)
493 newunit = propunit("", self.personality.name)
494
495
496
497 elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/":
498
499 if line not in self.personality.drop_comments:
500 newunit.comments.append(line)
501 elif not line.strip():
502
503 if str(newunit).strip():
504 self.addunit(newunit)
505 newunit = propunit("", self.personality.name)
506 else:
507 newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line)
508 if delimiter_pos == -1:
509 newunit.name = self.personality.key_strip(line)
510 newunit.value = u""
511 self.addunit(newunit)
512 newunit = propunit("", self.personality.name)
513 else:
514 newunit.name = self.personality.key_strip(line[:delimiter_pos])
515 if is_line_continuation(line[delimiter_pos+1:].lstrip()):
516 inmultilinevalue = True
517 newunit.value = line[delimiter_pos+1:].lstrip()[:-1]
518 else:
519 newunit.value = self.personality.value_strip(line[delimiter_pos+1:])
520 self.addunit(newunit)
521 newunit = propunit("", self.personality.name)
522
523 if inmultilinevalue or len(newunit.comments) > 0:
524 self.addunit(newunit)
525
527 """convert the units back to lines"""
528 lines = []
529 for unit in self.units:
530 lines.append(str(unit))
531 return "".join(lines)
532
535 Name = _("Java Properties")
536 Exensions = ['properties']
537
539 kwargs['personality'] = "java"
540 kwargs['encoding'] = "auto"
541 super(javafile, self).__init__(*args, **kwargs)
542
551