Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  from translate.storage.placeables import StringElem 
 27   
 28   
 29  languages = { 
 30  'af': (u'Afrikaans', 2, '(n != 1)'), 
 31  'ak': (u'Akan', 2, 'n > 1'), 
 32  'am': (u'Amharic', 2, 'n > 1'), 
 33  'an': (u'Aragonese', 2, '(n != 1)'), 
 34  'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'), 
 35  'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'), 
 36  'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'), 
 37  'az': (u'Azerbaijani', 2, '(n != 1)'), 
 38  'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 39  'bg': (u'Bulgarian', 2, '(n != 1)'), 
 40  'bn': (u'Bengali', 2, '(n != 1)'), 
 41  'bn_IN': (u'Bengali (India)', 2, '(n != 1)'), 
 42  'bo': (u'Tibetan', 1, '0'), 
 43  'br': (u'Breton', 2, 'n > 1'), 
 44  'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 45  'ca': (u'Catalan; Valencian', 2, '(n != 1)'), 
 46  'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'), 
 47  'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 48  'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 49  'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'), 
 50  'da': (u'Danish', 2, '(n != 1)'), 
 51  'de': (u'German', 2, '(n != 1)'), 
 52  'dz': (u'Dzongkha', 1, '0'), 
 53  'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'), 
 54  'en': (u'English', 2, '(n != 1)'), 
 55  'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'), 
 56  'en_ZA': (u'English (South Africa)', 2, '(n != 1)'), 
 57  'eo': (u'Esperanto', 2, '(n != 1)'), 
 58  'es': (u'Spanish; Castilian', 2, '(n != 1)'), 
 59  'et': (u'Estonian', 2, '(n != 1)'), 
 60  'eu': (u'Basque', 2, '(n != 1)'), 
 61  'fa': (u'Persian', 1, '0'), 
 62  'fi': (u'Finnish', 2, '(n != 1)'), 
 63  'fil': (u'Filipino; Pilipino', 2, '(n > 1)'), 
 64  'fo': (u'Faroese', 2, '(n != 1)'), 
 65  'fr': (u'French', 2, '(n > 1)'), 
 66  'fur': (u'Friulian', 2, '(n != 1)'), 
 67  'fy': (u'Frisian', 2, '(n != 1)'), 
 68  'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 69  'gl': (u'Galician', 2, '(n != 1)'), 
 70  'gu': (u'Gujarati', 2, '(n != 1)'), 
 71  'gun': (u'Gun', 2, '(n > 1)'), 
 72  'ha': (u'Hausa', 2, '(n != 1)'), 
 73  'he': (u'Hebrew', 2, '(n != 1)'), 
 74  'hi': (u'Hindi', 2, '(n != 1)'), 
 75  'hy': (u'Armenian', 1, '0'), 
 76  'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 77  'hu': (u'Hungarian', 2, '(n != 1)'), 
 78  'id': (u'Indonesian', 1, '0'), 
 79  'is': (u'Icelandic', 2, '(n != 1)'), 
 80  'it': (u'Italian', 2, '(n != 1)'), 
 81  'ja': (u'Japanese', 1, '0'), 
 82  'jv': (u'Javanese', 2, '(n != 1)'), 
 83  'ka': (u'Georgian', 1, '0'), 
 84  'km': (u'Central Khmer', 1, '0'), 
 85  'kn': (u'Kannada', 2, '(n != 1)'), 
 86  'ko': (u'Korean', 1, '0'), 
 87  'ku': (u'Kurdish', 2, '(n != 1)'), 
 88  'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'), 
 89  'ky': (u'Kirghiz; Kyrgyz', 1, '0'), 
 90  'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'), 
 91  'ln': (u'Lingala', 2, '(n > 1)'), 
 92  'lo': (u'Lao', 1, '0'), 
 93  'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 94  'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 95  'mg': (u'Malagasy', 2, '(n > 1)'), 
 96  'mi': (u'Maori', 2, '(n > 1)'), 
 97  'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'), 
 98  'ml': (u'Malayalam', 2, '(n != 1)'), 
 99  'mn': (u'Mongolian', 2, '(n != 1)'), 
100  'mr': (u'Marathi', 2, '(n != 1)'), 
101  'ms': (u'Malay', 1, '0'), 
102  'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
103  'nah': (u'Nahuatl languages', 2, '(n != 1)'), 
104  'nap': (u'Neapolitan', 2, '(n != 1)'), 
105  'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'), 
106  'ne': (u'Nepali', 2, '(n != 1)'), 
107  'nl': (u'Dutch; Flemish', 2, '(n != 1)'), 
108  'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'), 
109  'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'), 
110  'oc': (u'Occitan (post 1500)', 2, '(n > 1)'), 
111  'or': (u'Oriya', 2, '(n != 1)'), 
112  'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'), 
113  'pap': (u'Papiamento', 2, '(n != 1)'), 
114  'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
115  'pms': (u'Piemontese', 2, '(n != 1)'), 
116  'ps': (u'Pushto; Pashto', 2, '(n != 1)'), 
117  'pt': (u'Portuguese', 2, '(n != 1)'), 
118  'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'), 
119  'rm': (u'Romansh', 2, '(n != 1)'), 
120  'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
121  'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
122  'sco': (u'Scots', 2, '(n != 1)'), 
123  'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'), 
124  'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
125  'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
126  'so': (u'Somali', 2, '(n != 1)'), 
127  'sq': (u'Albanian', 2, '(n != 1)'), 
128  'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
129  'st': (u'Sotho, Southern', 2, '(n != 1)'), 
130  'su': (u'Sundanese', 1, '0'), 
131  'sv': (u'Swedish', 2, '(n != 1)'), 
132  'sw': (u'Swahili', 2, '(n != 1)'), 
133  'ta': (u'Tamil', 2, '(n != 1)'), 
134  'te': (u'Telugu', 2, '(n != 1)'), 
135  'tg': (u'Tajik', 2, '(n != 1)'), 
136  'ti': (u'Tigrinya', 2, '(n > 1)'), 
137  'th': (u'Thai', 1, '0'), 
138  'tk': (u'Turkmen', 2, '(n != 1)'), 
139  'tr': (u'Turkish', 1, '0'), 
140  'tt': (u'Tatar', 1, '0'), 
141  'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
142  'vi': (u'Vietnamese', 1, '0'), 
143  'wa': (u'Walloon', 2, '(n > 1)'), 
144  # Chinese is difficult because the main divide is on script, not really  
145  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
146  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
147  'zh_CN': (u'Chinese (China)', 1, '0'), 
148  'zh_HK': (u'Chinese (Hong Kong)', 1, '0'), 
149  'zh_TW': (u'Chinese (Taiwan)', 1, '0'), 
150  'zu': (u'Zulu', 2, '(n != 1)'), 
151  } 
152  """Dictionary of language data. 
153  The language code is the dictionary key (which may contain country codes and modifiers). 
154  The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation). 
155   
156  Note that the English names should not be used in user facing places - it 
157  should always be passed through the function returned from tr_lang(), or at 
158  least passed through _fix_language_name().""" 
159   
160  _fixed_names = { 
161          u"Asturian; Bable; Leonese; Asturleonese": u"Asturian", 
162          u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål", 
163          u"Catalan; Valencian": u"Catalan", 
164          u"Central Khmer": u"Khmer", 
165          u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja", 
166          u"Divehi; Dhivehi; Maldivian": u"Divehi", 
167          u"Dutch; Flemish": u"Dutch", 
168          u"Filipino; Pilipino": u"Filipino", 
169          u"Greek, Modern (1453-)": u"Greek", 
170          u"Kirghiz; Kyrgyz": u"Kirghiz", 
171          u"Klingon; tlhIngan-Hol": u"Klingon", 
172          u"Limburgan; Limburger; Limburgish": u"Limburgish", 
173          u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German", 
174          u"Luxembourgish; Letzeburgesch": u"Luxembourgish", 
175          u"Ndebele, South; South Ndebele": u"Southern Ndebele", 
176          u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk", 
177          u"Occitan (post 1500)": u"Occitan", 
178          u"Panjabi; Punjabi": u"Punjabi", 
179          u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho", 
180          u"Pushto; Pashto": u"Pashto", 
181          u"Sinhala; Sinhalese": u"Sinhala", 
182          u"Sotho, Southern": u"Sotho", 
183          u"Spanish; Castilian": u"Spanish", 
184          u"Uighur; Uyghur": u"Uighur", 
185  } 
186   
187 -def simplercode(code):
188 """This attempts to simplify the given language code by ignoring country 189 codes, for example. 190 191 @see: 192 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt} 193 - U{http://www.rfc-editor.org/rfc/rfc4646.txt} 194 - U{http://www.rfc-editor.org/rfc/rfc4647.txt} 195 - U{http://www.w3.org/International/articles/language-tags/} 196 """ 197 if not code: 198 return code 199 200 normalized = normalize_code(code) 201 separator = normalized.rfind('-') 202 if separator >= 0: 203 return code[:separator] 204 else: 205 return ""
206 207 208 expansion_factors = { 209 'af': 0.1, 210 'ar': -0.09, 211 'es': 0.21, 212 'fr': 0.28, 213 'it': 0.2, 214 } 215 """Source to target string length expansion factors.""" 216 217 import gettext 218 import locale 219 import re 220 import os 221 222 iso639 = {} 223 """ISO 639 language codes""" 224 iso3166 = {} 225 """ISO 3166 country codes""" 226 227 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$") 228 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$") 229
230 -def languagematch(languagecode, otherlanguagecode):
231 """matches a languagecode to another, ignoring regions in the second""" 232 if languagecode is None: 233 return langcode_re.match(otherlanguagecode) 234 return languagecode == otherlanguagecode or \ 235 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
236 237 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]+)\)$") 238
239 -def tr_lang(langcode=None):
240 """Gives a function that can translate a language name, even in the form C{"language (country)"}, 241 into the language with iso code langcode, or the system language if no language is specified.""" 242 langfunc = gettext_lang(langcode) 243 countryfunc = gettext_country(langcode) 244 245 def handlelanguage(name): 246 match = dialect_name_re.match(name) 247 if match: 248 language, country = match.groups() 249 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country)) 250 else: 251 return _fix_language_name(langfunc(name))
252 253 return handlelanguage 254
255 -def _fix_language_name(name):
256 """Identify and replace some unsightly names present in iso-codes. 257 258 If the name is present in _fixed_names we assume it is untranslated and 259 we replace it with a more usable rendering.""" 260 return _fixed_names.get(name, name)
261 262
263 -def gettext_lang(langcode=None):
264 """Returns a gettext function to translate language names into the given 265 language, or the system language if no language is specified.""" 266 if not langcode in iso639: 267 if not langcode: 268 langcode = "" 269 if os.name == "nt": 270 # On Windows the default locale is not used for some reason 271 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True) 272 else: 273 t = gettext.translation('iso_639', fallback=True) 274 else: 275 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 276 iso639[langcode] = t.ugettext 277 return iso639[langcode]
278
279 -def gettext_country(langcode=None):
280 """Returns a gettext function to translate country names into the given 281 language, or the system language if no language is specified.""" 282 if not langcode in iso3166: 283 if not langcode: 284 langcode = "" 285 if os.name == "nt": 286 # On Windows the default locale is not used for some reason 287 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True) 288 else: 289 t = gettext.translation('iso_3166', fallback=True) 290 else: 291 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 292 iso3166[langcode] = t.ugettext 293 return iso3166[langcode]
294
295 -def normalize(string, normal_form="NFC"):
296 """Return a unicode string in its normalized form 297 298 @param string: The string to be normalized 299 @param normal_form: NFC (default), NFD, NFKC, NFKD 300 @return: Normalized string 301 """ 302 if string is None: 303 return None 304 else: 305 return unicodedata.normalize(normal_form, string)
306
307 -def forceunicode(string):
308 """Ensures that the string is in unicode. 309 310 @param string: A text string 311 @type string: Unicode, String 312 @return: String converted to Unicode and normalized as needed. 313 @rtype: Unicode 314 """ 315 if string is None: 316 return None 317 if isinstance(string, str): 318 encoding = getattr(string, "encoding", "utf-8") 319 string = string.decode(encoding) 320 elif isinstance(string, StringElem): 321 string = unicode(string) 322 return string
323
324 -def normalized_unicode(string):
325 """Forces the string to unicode and does normalization.""" 326 return normalize(forceunicode(string))
327
328 -def normalize_code(code):
329 return code.replace("_", "-").replace("@", "-").lower()
330
331 -def simplify_to_common(language_code, languages=languages):
332 """Simplify language code to the most commonly used form for the 333 language, stripping country information for languages that tend 334 not to be localized differently for different countries""" 335 simpler = simplercode(language_code) 336 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "": 337 return language_code 338 else: 339 return simplify_to_common(simpler)
340