1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'an': (u'Aragonese', 2, '(n != 1)'),
34 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
35 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
36 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'),
37 'az': (u'Azerbaijani', 2, '(n != 1)'),
38 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'bg': (u'Bulgarian', 2, '(n != 1)'),
40 'bn': (u'Bengali', 2, '(n != 1)'),
41 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
42 'bo': (u'Tibetan', 1, '0'),
43 'br': (u'Breton', 2, 'n > 1'),
44 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
46 'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'),
47 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
48 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
49 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
50 'da': (u'Danish', 2, '(n != 1)'),
51 'de': (u'German', 2, '(n != 1)'),
52 'dz': (u'Dzongkha', 1, '0'),
53 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
54 'en': (u'English', 2, '(n != 1)'),
55 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
56 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
57 'eo': (u'Esperanto', 2, '(n != 1)'),
58 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
59 'et': (u'Estonian', 2, '(n != 1)'),
60 'eu': (u'Basque', 2, '(n != 1)'),
61 'fa': (u'Persian', 1, '0'),
62 'fi': (u'Finnish', 2, '(n != 1)'),
63 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
64 'fo': (u'Faroese', 2, '(n != 1)'),
65 'fr': (u'French', 2, '(n > 1)'),
66 'fur': (u'Friulian', 2, '(n != 1)'),
67 'fy': (u'Frisian', 2, '(n != 1)'),
68 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
69 'gl': (u'Galician', 2, '(n != 1)'),
70 'gu': (u'Gujarati', 2, '(n != 1)'),
71 'gun': (u'Gun', 2, '(n > 1)'),
72 'ha': (u'Hausa', 2, '(n != 1)'),
73 'he': (u'Hebrew', 2, '(n != 1)'),
74 'hi': (u'Hindi', 2, '(n != 1)'),
75 'hy': (u'Armenian', 1, '0'),
76 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
77 'hu': (u'Hungarian', 2, '(n != 1)'),
78 'ia': (u"Interlingua (International Auxiliary Language Association)", 2, '(n != 1)'),
79 'id': (u'Indonesian', 1, '0'),
80 'is': (u'Icelandic', 2, '(n != 1)'),
81 'it': (u'Italian', 2, '(n != 1)'),
82 'ja': (u'Japanese', 1, '0'),
83 'jv': (u'Javanese', 2, '(n != 1)'),
84 'ka': (u'Georgian', 1, '0'),
85 'kk': (u'Kazakh', 1, '0'),
86 'km': (u'Central Khmer', 1, '0'),
87 'kn': (u'Kannada', 2, '(n != 1)'),
88 'ko': (u'Korean', 1, '0'),
89 'ku': (u'Kurdish', 2, '(n != 1)'),
90 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
91 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
92 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
93 'ln': (u'Lingala', 2, '(n > 1)'),
94 'lo': (u'Lao', 1, '0'),
95 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
96 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
97 'mg': (u'Malagasy', 2, '(n > 1)'),
98 'mi': (u'Maori', 2, '(n > 1)'),
99 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
100 'ml': (u'Malayalam', 2, '(n != 1)'),
101 'mn': (u'Mongolian', 2, '(n != 1)'),
102 'mr': (u'Marathi', 2, '(n != 1)'),
103 'ms': (u'Malay', 1, '0'),
104 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
105 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
106 'nap': (u'Neapolitan', 2, '(n != 1)'),
107 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
108 'ne': (u'Nepali', 2, '(n != 1)'),
109 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
110 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
111 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
112 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
113 'or': (u'Oriya', 2, '(n != 1)'),
114 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
115 'pap': (u'Papiamento', 2, '(n != 1)'),
116 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
117 'pms': (u'Piemontese', 2, '(n != 1)'),
118 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
119 'pt': (u'Portuguese', 2, '(n != 1)'),
120 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
121 'rm': (u'Romansh', 2, '(n != 1)'),
122 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
123 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
124 'sco': (u'Scots', 2, '(n != 1)'),
125 'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'),
126 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
127 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
128 'so': (u'Somali', 2, '(n != 1)'),
129 'sq': (u'Albanian', 2, '(n != 1)'),
130 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
131 'st': (u'Sotho, Southern', 2, '(n != 1)'),
132 'su': (u'Sundanese', 1, '0'),
133 'sv': (u'Swedish', 2, '(n != 1)'),
134 'sw': (u'Swahili', 2, '(n != 1)'),
135 'ta': (u'Tamil', 2, '(n != 1)'),
136 'te': (u'Telugu', 2, '(n != 1)'),
137 'tg': (u'Tajik', 2, '(n != 1)'),
138 'ti': (u'Tigrinya', 2, '(n > 1)'),
139 'th': (u'Thai', 1, '0'),
140 'tk': (u'Turkmen', 2, '(n != 1)'),
141 'tr': (u'Turkish', 1, '0'),
142 'tt': (u'Tatar', 1, '0'),
143 'ug': (u'Uighur; Uyghur', 1, '0'),
144 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
145 'vi': (u'Vietnamese', 1, '0'),
146 'wa': (u'Walloon', 2, '(n > 1)'),
147
148
149
150 'zh_CN': (u'Chinese (China)', 1, '0'),
151 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
152 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
153 'zu': (u'Zulu', 2, '(n != 1)'),
154 }
155 """Dictionary of language data.
156 The language code is the dictionary key (which may contain country codes and modifiers).
157 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
158
159 Note that the English names should not be used in user facing places - it
160 should always be passed through the function returned from tr_lang(), or at
161 least passed through _fix_language_name()."""
162
163 _fixed_names = {
164 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
165 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
166 u"Catalan; Valencian": u"Catalan",
167 u"Central Khmer": u"Khmer",
168 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
169 u"Divehi; Dhivehi; Maldivian": u"Divehi",
170 u"Dutch; Flemish": u"Dutch",
171 u"Filipino; Pilipino": u"Filipino",
172 u"Greek, Modern (1453-)": u"Greek",
173 u"Interlingua (International Auxiliary Language Association)": u"Interlingua",
174 u"Kirghiz; Kyrgyz": u"Kirghiz",
175 u"Klingon; tlhIngan-Hol": u"Klingon",
176 u"Limburgan; Limburger; Limburgish": u"Limburgish",
177 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
178 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
179 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
180 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
181 u"Occitan (post 1500)": u"Occitan",
182 u"Panjabi; Punjabi": u"Punjabi",
183 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
184 u"Pushto; Pashto": u"Pashto",
185 u"Sinhala; Sinhalese": u"Sinhala",
186 u"Sotho, Southern": u"Sotho",
187 u"Spanish; Castilian": u"Spanish",
188 u"Uighur; Uyghur": u"Uighur",
189 }
190
191
193 """This attempts to simplify the given language code by ignoring country
194 codes, for example.
195
196 @see:
197 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
198 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
199 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
200 - U{http://www.w3.org/International/articles/language-tags/}
201 """
202 if not code:
203 return code
204
205 normalized = normalize_code(code)
206 separator = normalized.rfind('-')
207 if separator >= 0:
208 return code[:separator]
209 else:
210 return ""
211
212
213 expansion_factors = {
214 'af': 0.1,
215 'ar': -0.09,
216 'es': 0.21,
217 'fr': 0.28,
218 'it': 0.2,
219 }
220 """Source to target string length expansion factors."""
221
222 import gettext
223 import locale
224 import re
225 import os
226
227 iso639 = {}
228 """ISO 639 language codes"""
229 iso3166 = {}
230 """ISO 3166 country codes"""
231
232 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
233 langcode_ire = re.compile("^[a-z]{2,3}([_-][a-z]{2,3})?(@[a-z0-9]+)?$", re.IGNORECASE)
234 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
235
236
238 """matches a languagecode to another, ignoring regions in the second"""
239 if languagecode is None:
240 return langcode_re.match(otherlanguagecode)
241 return languagecode == otherlanguagecode or \
242 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
243
244 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]{,25})\)$")
245
246
247
248
250 """Gives a function that can translate a language name, even in the form C{"language (country)"},
251 into the language with iso code langcode, or the system language if no language is specified."""
252 langfunc = gettext_lang(langcode)
253 countryfunc = gettext_country(langcode)
254
255 def handlelanguage(name):
256 match = dialect_name_re.match(name)
257 if match:
258 language, country = match.groups()
259 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
260 else:
261 return _fix_language_name(langfunc(name))
262
263 return handlelanguage
264
265
267 """Identify and replace some unsightly names present in iso-codes.
268
269 If the name is present in _fixed_names we assume it is untranslated and
270 we replace it with a more usable rendering. If the remaining part is long
271 and includes a semi-colon, we only take the text up to the semi-colon to
272 keep things neat."""
273 if name in _fixed_names:
274 return _fixed_names[name]
275 elif len(name) > 11:
276
277
278 split_point = name[5:].find(u';')
279 if split_point >= 0:
280 return name[:5+split_point]
281 return name
282
283
284 -def gettext_lang(langcode=None):
285 """Returns a gettext function to translate language names into the given
286 language, or the system language if no language is specified."""
287 if not langcode in iso639:
288 if not langcode:
289 langcode = ""
290 if os.name == "nt":
291
292 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
293 else:
294 t = gettext.translation('iso_639', fallback=True)
295 else:
296 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
297 iso639[langcode] = t.ugettext
298 return iso639[langcode]
299
300
301 -def gettext_country(langcode=None):
302 """Returns a gettext function to translate country names into the given
303 language, or the system language if no language is specified."""
304 if not langcode in iso3166:
305 if not langcode:
306 langcode = ""
307 if os.name == "nt":
308
309 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
310 else:
311 t = gettext.translation('iso_3166', fallback=True)
312 else:
313 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
314 iso3166[langcode] = t.ugettext
315 return iso3166[langcode]
316
317
319 """Return a unicode string in its normalized form
320
321 @param string: The string to be normalized
322 @param normal_form: NFC (default), NFD, NFKC, NFKD
323 @return: Normalized string
324 """
325 if string is None:
326 return None
327 else:
328 return unicodedata.normalize(normal_form, string)
329
330
332 """Ensures that the string is in unicode.
333
334 @param string: A text string
335 @type string: Unicode, String
336 @return: String converted to Unicode and normalized as needed.
337 @rtype: Unicode
338 """
339 if string is None:
340 return None
341 if isinstance(string, str):
342 encoding = getattr(string, "encoding", "utf-8")
343 string = string.decode(encoding)
344 elif isinstance(string, StringElem):
345 string = unicode(string)
346 return string
347
348
350 """Forces the string to unicode and does normalization."""
351 return normalize(forceunicode(string))
352
353
358
359
361 """Simplify language code to the most commonly used form for the
362 language, stripping country information for languages that tend
363 not to be localized differently for different countries"""
364 simpler = simplercode(language_code)
365 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
366 return language_code
367 else:
368 return simplify_to_common(simpler)
369