1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
24 """
25
26 import csv
27 import logging
28 import codecs
29 try:
30 import cStringIO as StringIO
31 except:
32 import StringIO
33
34 from translate.misc import sparse
35 from translate.storage import base
36
37
39
40 - def __init__(self, fileobj, fieldnames):
41 self.fieldnames = fieldnames
42 self.contents = fileobj.read()
43 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r")
44 self.parser.stringescaping = 0
45 self.parser.quotechars = '"'
46 self.tokens = self.parser.tokenize(self.contents)
47 self.tokenpos = 0
48
51
58
60 lentokens = len(self.tokens)
61 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
62 self.tokenpos += 1
63 if self.tokenpos >= lentokens:
64 raise StopIteration()
65 thistokens = []
66 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
67 thistokens.append(self.tokens[self.tokenpos])
68 self.tokenpos += 1
69 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
70 self.tokenpos += 1
71 fields = []
72
73 currentfield = ''
74 fieldparts = 0
75 for token in thistokens:
76 if token == ',':
77
78 if fieldparts == 1:
79 currentfield = self.getvalue(currentfield)
80 fields.append(currentfield)
81 currentfield = ''
82 fieldparts = 0
83 else:
84 currentfield += token
85 fieldparts += 1
86
87 if fieldparts:
88 if fieldparts == 1:
89 currentfield = self.getvalue(currentfield)
90 fields.append(currentfield)
91 values = {}
92 for fieldnum in range(len(self.fieldnames)):
93 if fieldnum >= len(fields):
94 values[self.fieldnames[fieldnum]] = ""
95 else:
96 values[self.fieldnames[fieldnum]] = fields[fieldnum]
97 return values
98
103
104 csv.register_dialect('default', DefaultDialect)
105
112
119
120 -class csvunit(base.TranslationUnit):
121 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
122
124 super(csvunit, self).__init__(source)
125 self.location = ""
126 self.source = source or ""
127 self.target = ""
128 self.id = ""
129 self.fuzzy = 'False'
130 self.developer_comments = ""
131 self.translator_comments = ""
132 self.context = ""
133
135 if self.id:
136 return self.id
137
138 result = self.source
139 context = self.context
140 if context:
141 result = u"%s\04%s" % (context, result)
142
143 return result
144
147
149
150 return [self.location]
151
153 self.location = location
154
155 - def getcontext(self):
157
158 - def setcontext(self, value):
160
162 if origin is None:
163 result = self.translator_comments
164 if self.developer_comments:
165 if result:
166 result += '\n' + self.developer_comments
167 else:
168 result = self.developer_comments
169 return result
170 elif origin == "translator":
171 return self.translator_comments
172 elif origin in ('programmer', 'developer', 'source code'):
173 return self.developer_comments
174 else:
175 raise ValueError("Comment type not valid")
176
177 - def addnote(self, text, origin=None, position="append"):
178 if origin in ('programmer', 'developer', 'source code'):
179 if position == 'append' and self.developer_comments:
180 self.developer_comments += '\n' + text
181 elif position == 'prepend' and self.developer_comments:
182 self.developer_comments = text + '\n' + self.developer_comments
183 else:
184 self.developer_comments = text
185 else:
186 if position == 'append' and self.translator_comments:
187 self.translator_comments += '\n' + text
188 elif position == 'prepend' and self.translator_comments:
189 self.translator_comments = self.translator_comments + '\n' + text
190 else:
191 self.translator_comments = text
192
194 self.translator_comments = u''
195
197 if self.fuzzy.lower() in ('1', 'x', 'true', 'yes','fuzzy'):
198 return True
199 return False
200
202 if value:
203 self.fuzzy = 'True'
204 else:
205 self.fuzzy = 'False'
206
208 """see if unit might be a header"""
209 some_value = False
210 for key, value in self.todict().iteritems():
211 if value:
212 some_value = True
213 if key.lower() != 'fuzzy' and value and key.lower() != value.lower():
214 return False
215 return some_value
216
225
234
235 - def fromdict(self, cedict, encoding='utf-8'):
257
258
259
260 - def todict(self, encoding='utf-8'):
261
262
263 source = self.source
264 target = self.target
265 output = {
266 'location': from_unicode(self.location, encoding),
267 'source': from_unicode(source, encoding),
268 'target': from_unicode(target, encoding),
269 'id': from_unicode(self.id, encoding),
270 'fuzzy': str(self.fuzzy),
271 'context': from_unicode(self.context, encoding),
272 'translator_comments': from_unicode(self.translator_comments, encoding),
273 'developer_comments': from_unicode(self.developer_comments, encoding),
274 }
275
276 return output
277
280
281 canonical_field_names = ('location', 'source', 'target', 'id', 'fuzzy', 'context', 'translator_comments', 'developer_comments')
282 fieldname_map = {
283 'original': 'source',
284 'untranslated': 'source',
285 'translated': 'target',
286 'translation': 'target',
287 'identified': 'id',
288 'key': 'id',
289 'label': 'id',
290 'transaltor comments': 'translator_comments',
291 'notes': 'translator_comments',
292 'developer comments': 'developer_comments',
293 'state': 'fuzzy',
294 }
295
297
298 try:
299 inputfile.seek(0)
300 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect=dialect)
301 except csv.Error:
302 try:
303 inputfile.seek(0)
304 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect='default')
305 except csv.Error:
306 inputfile.seek(0)
307 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect='excel')
308 return reader
309
311 """check if fieldnames are valid"""
312 for fieldname in fieldnames:
313 if fieldname in canonical_field_names and fieldname == 'source':
314 return True
315 elif fieldname in fieldname_map and fieldname_map[fieldname] == 'source':
316 return True
317 return False
318
320 """Test if file has a header or not, also returns number of columns in first row"""
321 inputfile = StringIO.StringIO(sample)
322 try:
323 reader = csv.reader(inputfile, dialect)
324 except csv.Error:
325 try:
326 inputfile.seek(0)
327 reader = csv.reader(inputfile, 'default')
328 except csv.Error:
329 inputfile.seek(0)
330 reader = csv.reader(inputfile, 'excel')
331
332 header = reader.next()
333 columncount = max(len(header), 3)
334 if valid_fieldnames(header):
335 return header
336 return fieldnames[:columncount]
337
338 -class csvfile(base.TranslationStore):
339 """This class represents a .csv file with various lines.
340 The default format contains three columns: location, source, target"""
341 UnitClass = csvunit
342 Name = _("Comma Separated Value")
343 Mimetypes = ['text/comma-separated-values', 'text/csv']
344 Extensions = ["csv"]
345
346 - def __init__(self, inputfile=None, fieldnames=None, encoding="auto"):
347 base.TranslationStore.__init__(self, unitclass=self.UnitClass)
348 self.units = []
349 self.encoding = encoding or 'utf-8'
350 if not fieldnames:
351 self.fieldnames = ['location', 'source', 'target', 'id', 'fuzzy', 'context', 'translator_comments', 'developer_comments']
352 else:
353 if isinstance(fieldnames, basestring):
354 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
355 self.fieldnames = fieldnames
356 self.filename = getattr(inputfile, 'name', '')
357 self.dialect = 'default'
358 if inputfile is not None:
359 csvsrc = inputfile.read()
360 inputfile.close()
361 self.parse(csvsrc)
362
363
364 - def parse(self, csvsrc):
365 text, encoding = self.detect_encoding(csvsrc, default_encodings=['utf-8', 'utf-16'])
366
367 if encoding and encoding.lower() != 'utf-8':
368 csvsrc = text.encode('utf-8').lstrip(codecs.BOM_UTF8)
369 self.encoding = encoding or 'utf-8'
370
371 sniffer = csv.Sniffer()
372
373 sample = csvsrc[:1024]
374 if isinstance(sample, unicode):
375 sample = sample.encode('utf-8')
376
377 try:
378 self.dialect = sniffer.sniff(sample)
379 if not self.dialect.escapechar:
380 self.dialect.escapechar = '\\'
381 if self.dialect.quoting == csv.QUOTE_MINIMAL:
382
383 self.dialect.quoting = csv.QUOTE_ALL
384 self.dialect.doublequote = True
385 except csv.Error:
386 self.dialect = 'default'
387
388 try:
389 fieldnames = detect_header(sample, self.dialect, self.fieldnames)
390 self.fieldnames = fieldnames
391 except csv.Error:
392 pass
393
394 inputfile = csv.StringIO(csvsrc)
395 reader = try_dialects(inputfile, self.fieldnames, self.dialect)
396
397
398 first_row = True
399 for row in reader:
400 newce = self.UnitClass()
401 newce.fromdict(row)
402 if not first_row or not newce.match_header():
403 self.addunit(newce)
404 first_row = False
405
416
418 outputfile = StringIO.StringIO()
419 writer = csv.DictWriter(outputfile, self.fieldnames, extrasaction='ignore', dialect=self.dialect)
420
421 hdict = dict(map(None, self.fieldnames, self.fieldnames))
422 writer.writerow(hdict)
423 for ce in self.units:
424 cedict = ce.todict()
425 writer.writerow(cedict)
426 return outputfile.getvalue()
427