1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
24
25 from translate.storage import base
26 from translate.misc import quote
27
28 import re
29 import warnings
30 try:
31 from lxml import etree
32 import StringIO
33 except ImportError:
34 etree = None
35
36 labelsuffixes = (".label", ".title")
37 """Label suffixes: entries with this suffix are able to be comibed with accesskeys
38 found in in entries ending with L{accesskeysuffixes}"""
39 accesskeysuffixes = (".accesskey", ".accessKey", ".akey")
40 """Accesskey Suffixes: entries with this suffix may be combined with labels
41 ending in L{labelsuffixes} into accelerator notation"""
42
51
64
66 """Find and remove ampersands that are not part of an entity definition.
67
68 A stray & in a DTD file can break an applications ability to parse the file. In Mozilla
69 localisation this is very important and these can break the parsing of files used in XUL
70 and thus break interface rendering. Tracking down the problem is very difficult,
71 thus by removing potential broken & and warning the users we can ensure that the output
72 DTD will always be parsable.
73
74 @type name: String
75 @param name: Entity name
76 @type value: String
77 @param value: Entity text value
78 @rtype: String
79 @return: Entity value without bad ampersands
80 """
81 def is_valid_entity_name(name):
82 """Check that supplied L{name} is a valid entity name"""
83 if name.replace('.', '').isalnum():
84 return True
85 elif name[0] == '#' and name[1:].isalnum():
86 return True
87 return False
88
89 amppos = 0
90 invalid_amps = []
91 while amppos >= 0:
92 amppos = value.find("&", amppos)
93 if amppos != -1:
94 amppos += 1
95 semipos = value.find(";", amppos)
96 if semipos != -1:
97 if is_valid_entity_name(value[amppos:semipos]):
98 continue
99 invalid_amps.append(amppos-1)
100 if len(invalid_amps) > 0:
101 warnings.warn("invalid ampersands in dtd entity %s" % (name))
102 adjustment = 0
103 for amppos in invalid_amps:
104 value = value[:amppos-adjustment] + value[amppos-adjustment+1:]
105 adjustment += 1
106 return value
107
108 -class dtdunit(base.TranslationUnit):
109 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
111 """construct the dtdunit, prepare it for parsing"""
112 super(dtdunit, self).__init__(source)
113 self.comments = []
114 self.unparsedlines = []
115 self.incomment = False
116 self.inentity = False
117 self.entity = "FakeEntityOnlyForInitialisationAndTesting"
118 self.source = source
119
120
122 """Sets the definition to the quoted value of source"""
123 self.definition = quotefordtd(source)
124 self._rich_source = None
125
127 """gets the unquoted source string"""
128 return unquotefromdtd(self.definition)
129 source = property(getsource, setsource)
130
132 """Sets the definition to the quoted value of target"""
133 if target is None:
134 target = ""
135 self.definition = quotefordtd(target)
136 self._rich_target = None
137
139 """gets the unquoted target string"""
140 return unquotefromdtd(self.definition)
141 target = property(gettarget, settarget)
142
144 """returns whether this dtdunit doesn't actually have an entity definition"""
145
146
147 return self.entity is None
148
149 - def parse(self, dtdsrc):
150 """read the first dtd element from the source code into this object, return linesprocessed"""
151 self.comments = []
152
153 self.locfilenotes = self.comments
154 self.locgroupstarts = self.comments
155 self.locgroupends = self.comments
156 self.locnotes = self.comments
157
158
159
160
161
162 self.entity = None
163 self.definition = ''
164 if not dtdsrc:
165 return 0
166 lines = dtdsrc.split("\n")
167 linesprocessed = 0
168 comment = ""
169 for line in lines:
170 line += "\n"
171 linesprocessed += 1
172
173 if not self.incomment:
174 if (line.find('<!--') != -1):
175 self.incomment = True
176 self.continuecomment = False
177
178 (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
179 if comment.find('LOCALIZATION NOTE') != -1:
180 l = quote.findend(comment,'LOCALIZATION NOTE')
181 while (comment[l] == ' '):
182 l += 1
183 if comment.find('FILE', l) == l:
184 self.commenttype = "locfile"
185 elif comment.find('BEGIN', l) == l:
186 self.commenttype = "locgroupstart"
187 elif comment.find('END', l) == l:
188 self.commenttype = "locgroupend"
189 else:
190 self.commenttype = "locnote"
191 else:
192
193 self.commenttype = "comment"
194
195 elif not self.inentity and re.search("%.*;", line):
196
197 self.comments.append(("comment", line))
198 line = ""
199 continue
200
201 if self.incomment:
202
203 (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
204
205 self.continuecomment = self.incomment
206
207 line = line.replace(comment, "", 1)
208
209 if not self.incomment:
210 if line.isspace():
211 comment += line
212 line = ''
213 else:
214 comment += '\n'
215
216
217
218
219
220
221
222 commentpair = (self.commenttype, comment)
223 if self.commenttype == "locfile":
224 self.locfilenotes.append(commentpair)
225 elif self.commenttype == "locgroupstart":
226 self.locgroupstarts.append(commentpair)
227 elif self.commenttype == "locgroupend":
228 self.locgroupends.append(commentpair)
229 elif self.commenttype == "locnote":
230 self.locnotes.append(commentpair)
231 elif self.commenttype == "comment":
232 self.comments.append(commentpair)
233
234 if not self.inentity and not self.incomment:
235 entitypos = line.find('<!ENTITY')
236 if entitypos != -1:
237 self.inentity = True
238 beforeentity = line[:entitypos].strip()
239 if beforeentity.startswith("#"):
240 self.hashprefix = beforeentity
241 self.entitypart = "start"
242 else:
243 self.unparsedlines.append(line)
244
245 if self.inentity:
246 if self.entitypart == "start":
247
248 e = quote.findend(line,'<!ENTITY')
249 line = line[e:]
250 self.entitypart = "name"
251 self.entitytype = "internal"
252 if self.entitypart == "name":
253 e = 0
254 while (e < len(line) and line[e].isspace()):
255 e += 1
256 self.entity = ''
257 if (e < len(line) and line[e] == '%'):
258 self.entitytype = "external"
259 self.entityparameter = ""
260 e += 1
261 while (e < len(line) and line[e].isspace()):
262 e += 1
263 while (e < len(line) and not line[e].isspace()):
264 self.entity += line[e]
265 e += 1
266 while (e < len(line) and line[e].isspace()):
267 e += 1
268 if self.entity:
269 if self.entitytype == "external":
270 self.entitypart = "parameter"
271 else:
272 self.entitypart = "definition"
273
274 if e == len(line):
275 self.entityhelp = None
276 e = 0
277 continue
278 elif self.entitypart == "definition":
279 self.entityhelp = (e, line[e])
280 self.instring = False
281 if self.entitypart == "parameter":
282 while (e < len(line) and line[e].isspace()):
283 e += 1
284 paramstart = e
285 while (e < len(line) and line[e].isalnum()):
286 e += 1
287 self.entityparameter += line[paramstart:e]
288 while (e < len(line) and line[e].isspace()):
289 e += 1
290 line = line[e:]
291 e = 0
292 if not line:
293 continue
294 if line[0] in ('"', "'"):
295 self.entitypart = "definition"
296 self.entityhelp = (e, line[e])
297 self.instring = False
298 if self.entitypart == "definition":
299 if self.entityhelp is None:
300 e = 0
301 while (e < len(line) and line[e].isspace()):
302 e += 1
303 if e == len(line):
304 continue
305 self.entityhelp = (e, line[e])
306 self.instring = False
307
308 e = self.entityhelp[0]
309 if (self.entityhelp[1] == "'"):
310 (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
311 elif (self.entityhelp[1] == '"'):
312 (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
313 else:
314 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
315
316 self.entityhelp = (0, self.entityhelp[1])
317 self.definition += defpart
318 if not self.instring:
319 self.inentity = False
320 break
321
322
323 if 0:
324 for attr in dir(self):
325 r = repr(getattr(self, attr))
326 if len(r) > 60:
327 r = r[:57]+"..."
328 self.comments.append(("comment", "self.%s = %s" % (attr, r) ))
329 return linesprocessed
330
337
339 """convert the dtd entity back to string form"""
340 lines = []
341 lines.extend([comment for commenttype, comment in self.comments])
342 lines.extend(self.unparsedlines)
343 if self.isnull():
344 result = "".join(lines)
345 return result.rstrip() + "\n"
346
347
348
349
350 if len(self.entity) > 0:
351 if getattr(self, 'entitytype', None) == 'external':
352 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>'
353 else:
354 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>'
355 if getattr(self, 'hashprefix', None):
356 entityline = self.hashprefix + " " + entityline
357 if isinstance(entityline, unicode):
358 entityline = entityline.encode('UTF-8')
359 lines.append(entityline+'\n')
360 return "".join(lines)
361
362 -class dtdfile(base.TranslationStore):
363 """this class represents a .dtd file, made up of dtdunits"""
364 UnitClass = dtdunit
366 """construct a dtdfile, optionally reading in from inputfile"""
367 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
368 self.filename = getattr(inputfile, 'name', '')
369 if inputfile is not None:
370 dtdsrc = inputfile.read()
371 self.parse(dtdsrc)
372 self.makeindex()
373
374 - def parse(self, dtdsrc):
375 """read the source code of a dtd file in and include them as dtdunits in self.units"""
376 start = 0
377 end = 0
378 lines = dtdsrc.split("\n")
379 while end < len(lines):
380 if (start == end):
381 end += 1
382 foundentity = False
383 while end < len(lines):
384 if end >= len(lines):
385 break
386 if lines[end].find('<!ENTITY') > -1:
387 foundentity = True
388 if foundentity and re.match("[\"']\s*>", lines[end]):
389 end += 1
390 break
391 end += 1
392
393
394 linesprocessed = 1
395 while linesprocessed >= 1:
396 newdtd = dtdunit()
397 try:
398 linesprocessed = newdtd.parse("\n".join(lines[start:end]))
399 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines):
400 self.units.append(newdtd)
401 except Exception, e:
402 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end])))
403 start += linesprocessed
404
406 """convert to a string. double check that unicode is handled somehow here"""
407 source = self.getoutput()
408 if not self._valid_store():
409 warnings.warn("DTD file '%s' does not validate" % self.filename)
410 return None
411 if isinstance(source, unicode):
412 return source.encode(getattr(self, "encoding", "UTF-8"))
413 return source
414
416 """convert the units back to source"""
417 sources = [str(dtd) for dtd in self.units]
418 return "".join(sources)
419
421 """makes self.index dictionary keyed on entities"""
422 self.index = {}
423 for dtd in self.units:
424 if not dtd.isnull():
425 self.index[dtd.entity] = dtd
426
428 """Validate the store to determine if it is valid
429
430 This uses ElementTree to parse the DTD
431
432 @return: If the store passes validation
433 @rtype: Boolean
434 """
435 if etree is not None:
436 try:
437
438 dtd = etree.DTD(StringIO.StringIO(re.sub("#expand", "", self.getoutput())))
439 except etree.DTDParseError:
440 return False
441 return True
442