Package translate :: Package storage :: Module lisa
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.lisa

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2006-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Parent class for LISA standards (TMX, TBX, XLIFF)""" 
 22   
 23  import re 
 24   
 25  try: 
 26      from lxml import etree 
 27      from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \ 
 28                                             getXMLspace, setXMLspace, namespaced 
 29  except ImportError, e: 
 30      raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.") 
 31   
 32  from translate.storage import base 
 33  from translate.lang import data 
 34   
 35   
36 -class LISAunit(base.TranslationUnit):
37 """ 38 A single unit in the file. Provisional work is done to make several 39 languages possible. 40 """ 41 42 #The name of the root element of this unit type:(termEntry, tu, trans-unit) 43 rootNode = "" 44 # The name of the per language element of this unit type:(termEntry, tu, 45 # trans-unit) 46 languageNode = "" 47 #The name of the innermost element of this unit type:(term, seg) 48 textNode = "" 49 50 namespace = None 51 _default_xml_space = "preserve" 52 """The default handling of spacing in the absense of an xml:space 53 attribute. 54 55 This is mostly for correcting XLIFF behaviour.""" 56
57 - def __init__(self, source, empty=False, **kwargs):
58 """Constructs a unit containing the given source string""" 59 self._rich_source = None 60 self._rich_target = None 61 if empty: 62 self._state_n = 0 63 return 64 self.xmlelement = etree.Element(self.namespaced(self.rootNode)) 65 #add descrip, note, etc. 66 super(LISAunit, self).__init__(source)
67
68 - def __eq__(self, other):
69 """Compares two units""" 70 if not isinstance(other, LISAunit): 71 return super(LISAunit, self).__eq__(other) 72 languageNodes = self.getlanguageNodes() 73 otherlanguageNodes = other.getlanguageNodes() 74 if len(languageNodes) != len(otherlanguageNodes): 75 return False 76 for i in range(len(languageNodes)): 77 mytext = self.getNodeText(languageNodes[i], 78 getXMLspace(self.xmlelement, 79 self._default_xml_space)) 80 othertext = other.getNodeText(otherlanguageNodes[i], 81 getXMLspace(self.xmlelement, 82 self._default_xml_space)) 83 if mytext != othertext: 84 #TODO:^ maybe we want to take children and notes into account 85 return False 86 return True
87
88 - def namespaced(self, name):
89 """Returns name in Clark notation. 90 91 For example namespaced("source") in an XLIFF document might return:: 92 {urn:oasis:names:tc:xliff:document:1.1}source 93 This is needed throughout lxml. 94 """ 95 return namespaced(self.namespace, name)
96
97 - def set_source_dom(self, dom_node):
98 languageNodes = self.getlanguageNodes() 99 if len(languageNodes) > 0: 100 self.xmlelement.replace(languageNodes[0], dom_node) 101 else: 102 self.xmlelement.append(dom_node)
103
104 - def get_source_dom(self):
105 return self.getlanguageNode(lang=None, index=0)
106 source_dom = property(get_source_dom, set_source_dom) 107
108 - def setsource(self, text, sourcelang='en'):
109 if self._rich_source is not None: 110 self._rich_source = None 111 text = data.forceunicode(text) 112 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
113
114 - def getsource(self):
115 return self.getNodeText(self.source_dom, 116 getXMLspace(self.xmlelement, 117 self._default_xml_space))
118 source = property(getsource, setsource) 119
120 - def set_target_dom(self, dom_node, append=False):
121 languageNodes = self.getlanguageNodes() 122 assert len(languageNodes) > 0 123 if dom_node is not None: 124 if append or len(languageNodes) == 0: 125 self.xmlelement.append(dom_node) 126 else: 127 self.xmlelement.insert(1, dom_node) 128 if not append and len(languageNodes) > 1: 129 self.xmlelement.remove(languageNodes[1])
130
131 - def get_target_dom(self, lang=None):
132 if lang: 133 return self.getlanguageNode(lang=lang) 134 else: 135 return self.getlanguageNode(lang=None, index=1)
136 target_dom = property(get_target_dom) 137
138 - def settarget(self, text, lang='xx', append=False):
139 """Sets the "target" string (second language), or alternatively 140 appends to the list""" 141 #XXX: we really need the language - can't really be optional, and we 142 # need to propagate it 143 if self._rich_target is not None: 144 self._rich_target = None 145 text = data.forceunicode(text) 146 # Firstly deal with reinitialising to None or setting to identical 147 # string 148 if self.gettarget() == text: 149 return 150 languageNode = self.get_target_dom(None) 151 if not text is None: 152 if languageNode is None: 153 languageNode = self.createlanguageNode(lang, text, "target") 154 self.set_target_dom(languageNode, append) 155 else: 156 if self.textNode: 157 terms = languageNode.iter(self.namespaced(self.textNode)) 158 try: 159 languageNode = terms.next() 160 except StopIteration, e: 161 pass 162 languageNode.text = text 163 else: 164 self.set_target_dom(None, False)
165
166 - def gettarget(self, lang=None):
167 """retrieves the "target" text (second entry), or the entry in the 168 specified language, if it exists""" 169 return self.getNodeText(self.get_target_dom(lang), 170 getXMLspace(self.xmlelement, 171 self._default_xml_space))
172 target = property(gettarget, settarget) 173
174 - def createlanguageNode(self, lang, text, purpose=None):
175 """Returns a xml Element setup with given parameters to represent a 176 single language entry. Has to be overridden.""" 177 return None
178
179 - def createPHnodes(self, parent, text):
180 """Create the text node in parent containing all the ph tags""" 181 matches = _getPhMatches(text) 182 if not matches: 183 parent.text = text 184 return 185 186 # Now we know there will definitely be some ph tags 187 start = matches[0].start() 188 pretext = text[:start] 189 if pretext: 190 parent.text = pretext 191 lasttag = parent 192 for i, m in enumerate(matches): 193 #pretext 194 pretext = text[start:m.start()] 195 # this will never happen with the first ph tag 196 if pretext: 197 lasttag.tail = pretext 198 #ph node 199 phnode = etree.SubElement(parent, self.namespaced("ph")) 200 phnode.set("id", str(i+1)) 201 phnode.text = m.group() 202 lasttag = phnode 203 start = m.end() 204 #post text 205 if text[start:]: 206 lasttag.tail = text[start:]
207
208 - def getlanguageNodes(self):
209 """Returns a list of all nodes that contain per language information. 210 """ 211 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
212
213 - def getlanguageNode(self, lang=None, index=None):
214 """Retrieves a languageNode either by language or by index""" 215 if lang is None and index is None: 216 raise KeyError("No criterea for languageNode given") 217 languageNodes = self.getlanguageNodes() 218 if lang: 219 for set in languageNodes: 220 if getXMLlang(set) == lang: 221 return set 222 else:#have to use index 223 if index >= len(languageNodes): 224 return None 225 else: 226 return languageNodes[index] 227 return None
228
229 - def getNodeText(self, languageNode, xml_space="preserve"):
230 """Retrieves the term from the given languageNode""" 231 if languageNode is None: 232 return None 233 if self.textNode: 234 terms = languageNode.iterdescendants(self.namespaced(self.textNode)) 235 if terms is None: 236 return None 237 else: 238 return getText(terms.next(), xml_space) 239 else: 240 return getText(languageNode, xml_space)
241
242 - def __str__(self):
243 return etree.tostring(self.xmlelement, pretty_print=True, 244 encoding='utf-8')
245
246 - def _set_property(self, name, value):
247 self.xmlelement.attrib[name] = value
248 249 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')], 250 lambda self, value: self._set_property(self.namespaced('xid'), value)) 251 252 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')], 253 lambda self, value: self._set_property(self.namespaced('rid'), value)) 254
255 - def createfromxmlElement(cls, element):
256 term = cls(None, empty=True) 257 term.xmlelement = element 258 return term
259 createfromxmlElement = classmethod(createfromxmlElement)
260 261
262 -class LISAfile(base.TranslationStore):
263 """A class representing a file store for one of the LISA file formats.""" 264 UnitClass = LISAunit 265 #The root node of the XML document: 266 rootNode = "" 267 #The root node of the content section: 268 bodyNode = "" 269 #The XML skeleton to use for empty construction: 270 XMLskeleton = "" 271 272 namespace = None 273
274 - def __init__(self, inputfile=None, sourcelanguage='en', 275 targetlanguage=None, unitclass=None):
276 super(LISAfile, self).__init__(unitclass=unitclass) 277 if inputfile is not None: 278 self.parse(inputfile) 279 assert self.document.getroot().tag == self.namespaced(self.rootNode) 280 else: 281 # We strip out newlines to ensure that spaces in the skeleton 282 # doesn't interfere with the the pretty printing of lxml 283 self.parse(self.XMLskeleton.replace("\n", "")) 284 self.setsourcelanguage(sourcelanguage) 285 self.settargetlanguage(targetlanguage) 286 self.addheader() 287 self._encoding = "UTF-8"
288
289 - def addheader(self):
290 """Method to be overridden to initialise headers, etc.""" 291 pass
292
293 - def namespaced(self, name):
294 """Returns name in Clark notation. 295 296 For example namespaced("source") in an XLIFF document might return:: 297 {urn:oasis:names:tc:xliff:document:1.1}source 298 This is needed throughout lxml. 299 """ 300 return namespaced(self.namespace, name)
301
302 - def initbody(self):
303 """Initialises self.body so it never needs to be retrieved from the 304 XML again.""" 305 self.namespace = self.document.getroot().nsmap.get(None, None) 306 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
307
308 - def addsourceunit(self, source):
309 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word? 310 """Adds and returns a new unit with the given string as first entry.""" 311 newunit = self.UnitClass(source) 312 self.addunit(newunit) 313 return newunit
314
315 - def addunit(self, unit, new=True):
316 unit.namespace = self.namespace 317 super(LISAfile, self).addunit(unit) 318 if new: 319 self.body.append(unit.xmlelement)
320
321 - def __str__(self):
322 """Converts to a string containing the file's XML""" 323 return etree.tostring(self.document, pretty_print=True, 324 xml_declaration=True, encoding='utf-8')
325
326 - def parse(self, xml):
327 """Populates this object from the given xml string""" 328 if not hasattr(self, 'filename'): 329 self.filename = getattr(xml, 'name', '') 330 if hasattr(xml, "read"): 331 xml.seek(0) 332 posrc = xml.read() 333 xml = posrc 334 if etree.LXML_VERSION >= (2, 1, 0): 335 #Since version 2.1.0 we can pass the strip_cdata parameter to 336 #indicate that we don't want cdata to be converted to raw XML 337 parser = etree.XMLParser(strip_cdata=False) 338 else: 339 parser = etree.XMLParser() 340 self.document = etree.fromstring(xml, parser).getroottree() 341 self._encoding = self.document.docinfo.encoding 342 self.initbody() 343 assert self.document.getroot().tag == self.namespaced(self.rootNode) 344 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)): 345 term = self.UnitClass.createfromxmlElement(entry) 346 self.addunit(term, new=False)
347