1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation
26 (see write-qt.c) and on observation of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break
28 and print out the missing tag. They are easy to implement and should follow
29 the structure in 03 (Translation). We could find no examples that use these
30 so we'd rather leave it unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source
32 text. We assume that since they use a hash table to lookup the data there is
33 actually no need for the source text. It seems however that in Qt4's lrelease
34 all data is included in the resultant .qm file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we
36 need to implement the hashing algorithm (which seems to be identical to the
37 Gettext hash algorithm). Unlike Gettext it seems that the hash is required,
38 but that has not been validated.
39 @todo: The code can parse files correctly. But it could be cleaned up to be
40 more readable, especially the part that breaks the file into sections.
41
42 U{http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/qm.cpp}
43 U{Plural information<http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/numerus.cpp>}
44 U{QLocale languages<http://docs.huihoo.com/qt/4.5/qlocale.html#Language-enum>}
45 """
46
47 import codecs
48 import struct
49 import sys
50
51 from translate.misc.multistring import multistring
52 from translate.storage import base
53
54 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
55
56
58 """Helper to unpack Qt .qm files into a Python string"""
59 f = open(file_)
60 s = f.read()
61 print "\\x%02x" * len(s) % tuple(map(ord, s))
62 f.close()
63
64
65 -class qmunit(base.TranslationUnit):
66 """A class representing a .qm translation message."""
67
70
71
72 -class qmfile(base.TranslationStore):
73 """A class representing a .qm file."""
74 UnitClass = qmunit
75 Name = _("Qt .qm file")
76 Mimetypes = ["application/x-qm"]
77 Extensions = ["qm"]
78 _binary = True
79
87
89 """Output a string representation of the .qm data file"""
90 return ""
91
93 """parses the given file or file source string"""
94 if hasattr(input, 'name'):
95 self.filename = input.name
96 elif not getattr(self, 'filename', ''):
97 self.filename = ''
98 if hasattr(input, "read"):
99 qmsrc = input.read()
100 input.close()
101 input = qmsrc
102 if len(input) < 16:
103 raise ValueError("This is not a .qm file: file empty or too small")
104 magic = struct.unpack(">4L", input[:16])
105 if magic != QM_MAGIC_NUMBER:
106 raise ValueError("This is not a .qm file: invalid magic number")
107 startsection = 16
108 sectionheader = 5
109
110 def section_debug(name, section_type, startsection, length):
111 print "Section: %s (type: %#x, offset: %#x, length: %d)" % (name, section_type, startsection, length)
112 return
113
114 while startsection < len(input):
115 section_type, length = struct.unpack(">BL", input[startsection:startsection + sectionheader])
116 if section_type == 0x42:
117
118 hashash = True
119 hash_start = startsection + sectionheader
120 hash_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
121 elif section_type == 0x69:
122
123 hasmessages = True
124 messages_start = startsection + sectionheader
125 messages_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
126 elif section_type == 0x2f:
127
128 hascontexts = True
129 contexts_start = startsection + sectionheader
130 contexts_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
131 elif section_type == 0x88:
132
133 hasnumerusrules = True
134 numerusrules_start = startsection + sectionheader
135 numerusrules_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
136 else:
137 section_debug("Unkown", section_type, startsection, length)
138 startsection = startsection + sectionheader + length
139 pos = messages_start
140 source = target = None
141 while pos < messages_start + len(messages_data):
142 subsection, = struct.unpack(">B", input[pos:pos + 1])
143 if subsection == 0x01:
144
145 pos = pos + 1
146 if not source is None and not target is None:
147 newunit = self.addsourceunit(source)
148 newunit.target = target
149 source = target = None
150 else:
151 raise ValueError("Old .qm format with no source defined")
152 continue
153
154 pos = pos + 1
155 length, = struct.unpack(">l", input[pos:pos + 4])
156 if subsection == 0x03:
157 if length != -1:
158 raw, = struct.unpack(">%ds" % length,
159 input[pos + 4:pos + 4 + length])
160 string, templen = codecs.utf_16_be_decode(raw)
161 if target:
162 target.strings.append(string)
163 else:
164 target = multistring(string)
165 pos = pos + 4 + length
166 else:
167 target = ""
168 pos = pos + 4
169
170 elif subsection == 0x06:
171 source = input[pos + 4:pos + 4 + length].decode('iso-8859-1')
172
173 pos = pos + 4 + length
174 elif subsection == 0x07:
175 context = input[pos + 4:pos + 4 + length].decode('iso-8859-1')
176
177 pos = pos + 4 + length
178 elif subsection == 0x08:
179 comment = input[pos + 4:pos + 4 + length]
180
181 pos = pos + 4 + length
182 elif subsection == 0x05:
183 hash = input[pos:pos + 4]
184
185 pos = pos + 4
186 else:
187 if subsection == 0x02:
188 subsection_name = "SourceText16"
189 elif subsection == 0x04:
190 subsection_name = "Context16"
191 else:
192 subsection_name = "Unkown"
193 print >> sys.stderr, "Unimplemented: %s %s" % \
194 (subsection, subsection_name)
195 return
196
198 raise Exception("Writing of .qm files is not supported yet")
199