1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module to provide a cache of statistics in a database.
23
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
28
29 from UserDict import UserDict
30
31 from translate import __version__ as toolkitversion
32 from translate.storage import factory
33 from translate.misc.multistring import multistring
34 from translate.lang.common import Common
35
36 try:
37 from sqlite3 import dbapi2
38 except ImportError:
39 from pysqlite2 import dbapi2
40 import os.path
41 import re
42 import sys
43 import stat
44 import thread
45
46 kdepluralre = re.compile("^_n: ")
47 brtagre = re.compile("<br\s*?/?>")
48 xmltagre = re.compile("<[^>]+>")
49 numberre = re.compile("\\D\\.\\D")
50
51 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
62
64 """Counts the words in the unit's source and target, taking plurals into
65 account. The target words are only counted if the unit is translated."""
66 (sourcewords, targetwords) = (0, 0)
67 if isinstance(unit.source, multistring):
68 sourcestrings = unit.source.strings
69 else:
70 sourcestrings = [unit.source or ""]
71 for s in sourcestrings:
72 sourcewords += wordcount(s)
73 if not unit.istranslated():
74 return sourcewords, targetwords
75 if isinstance(unit.target, multistring):
76 targetstrings = unit.target.strings
77 else:
78 targetstrings = [unit.target or ""]
79 for s in targetstrings:
80 targetwords += wordcount(s)
81 return sourcewords, targetwords
82
84 - def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
91
93 return tuple(self[key] for key in self.record_keys)
94
101
108
111
113 """Modifies f to commit database changes if it executes without exceptions.
114 Otherwise it rolls back the database.
115
116 ALL publicly accessible methods in StatsCache MUST be decorated with this
117 decorator.
118 """
119
120 def decorated_f(self, *args, **kwargs):
121 try:
122 result = f(self, *args, **kwargs)
123 self.con.commit()
124 return result
125 except:
126
127
128
129 if self.con:
130 self.con.rollback()
131 raise
132 return decorated_f
133
134 UNTRANSLATED, TRANSLATED, FUZZY = 0, 1, 2
136 """Returns the numeric database state for the unit."""
137 if unit.istranslated():
138 return TRANSLATED
139 if unit.isfuzzy() and unit.target:
140 return FUZZY
141 return UNTRANSLATED
142
144 keys = ['translatedsourcewords',
145 'fuzzysourcewords',
146 'untranslatedsourcewords',
147 'translated',
148 'fuzzy',
149 'untranslated',
150 'translatedtargetwords']
151
154
156 self.cur = cur
157 self.cur.execute("""
158 CREATE TABLE IF NOT EXISTS filetotals(
159 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
160 translatedsourcewords INTEGER NOT NULL,
161 fuzzysourcewords INTEGER NOT NULL,
162 untranslatedsourcewords INTEGER NOT NULL,
163 translated INTEGER NOT NULL,
164 fuzzy INTEGER NOT NULL,
165 untranslated INTEGER NOT NULL,
166 translatedtargetwords INTEGER NOT NULL);""")
167
168 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
169 record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
170 if state_for_db is not None:
171 if state_for_db is UNTRANSLATED:
172 record['untranslated'] = 1
173 record['untranslatedsourcewords'] = sourcewords
174 if state_for_db is TRANSLATED:
175 record['translated'] = 1
176 record['translatedsourcewords'] = sourcewords
177 record['translatedtargetwords'] = targetwords
178 elif state_for_db is FUZZY:
179 record['fuzzy'] = 1
180 record['fuzzysourcewords'] = sourcewords
181 return record
182
183 new_record = classmethod(new_record)
184
186 record["total"] = record["untranslated"] + \
187 record["translated"] + \
188 record["fuzzy"]
189 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
190 record["translatedsourcewords"] + \
191 record["fuzzysourcewords"]
192 record["review"] = 0
193 _compute_derived_values = classmethod(_compute_derived_values)
194
201
203 self.cur.execute("""
204 INSERT OR REPLACE into filetotals
205 VALUES (%(fileid)d, %(vals)s);
206 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
207
209 self.cur.execute("""
210 DELETE FROM filetotals
211 WHERE fileid=?;
212 """, (fileid,))
213
215 """Returns a dictionary with all statistics initalised to 0."""
216 return FileTotals.new_record()
217
220
222 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
223
225 return {"sourcewordcount": [], "targetwordcount": []}
226
234 file_stat = os.stat(file_path)
235 assert not stat.S_ISDIR(file_stat.st_mode)
236 return file_stat.st_mtime, file_stat.st_size
237
239 return os.path.extsep + 'pending'
240
243
246 """An object instantiated as a singleton for each statsfile that provides
247 access to the database cache from a pool of StatsCache objects."""
248 _caches = {}
249 defaultfile = None
250 con = None
251 """This cache's connection"""
252 cur = None
253 """The current cursor"""
254
256 current_thread = thread.get_ident()
257 def make_database(statsfile):
258 def connect(cache):
259 cache.con = dbapi2.connect(statsfile)
260 cache.cur = cache.con.cursor()
261
262 def clear_old_data(cache):
263 try:
264 cache.cur.execute("""SELECT toolkitbuild FROM files""")
265 val = cache.cur.fetchone()
266
267
268 if val is None or val[0] < toolkitversion.build:
269 cache.con.close()
270 del cache
271 os.unlink(statsfile)
272 return True
273 return False
274 except dbapi2.OperationalError:
275 return False
276
277 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
278 connect(cache)
279 if clear_old_data(cache):
280 connect(cache)
281 cache.create()
282 return cache
283
284 if not statsfile:
285 if not cls.defaultfile:
286 userdir = os.path.expanduser("~")
287 cachedir = None
288 if os.name == "nt":
289 cachedir = os.path.join(userdir, "Translate Toolkit")
290 else:
291 cachedir = os.path.join(userdir, ".translate_toolkit")
292 if not os.path.exists(cachedir):
293 os.mkdir(cachedir)
294 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
295 statsfile = cls.defaultfile
296 else:
297 statsfile = os.path.realpath(statsfile)
298
299 if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
300 return cls._caches[current_thread][statsfile]
301
302 return make_database(statsfile)
303
304 @transaction
306 """Create all tables and indexes."""
307 self.file_totals = FileTotals(self.cur)
308
309 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
310 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
311 path VARCHAR NOT NULL UNIQUE,
312 st_mtime INTEGER NOT NULL,
313 st_size INTEGER NOT NULL,
314 toolkitbuild INTEGER NOT NULL);""")
315
316 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
317 ON files (path);""")
318
319 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
320 id INTEGER PRIMARY KEY AUTOINCREMENT,
321 unitid VARCHAR NOT NULL,
322 fileid INTEGER NOT NULL,
323 unitindex INTEGER NOT NULL,
324 source VARCHAR NOT NULL,
325 target VARCHAR,
326 state INTEGER,
327 sourcewords INTEGER,
328 targetwords INTEGER);""")
329
330 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
331 ON units(fileid);""")
332
333 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
334 configid INTEGER PRIMARY KEY AUTOINCREMENT,
335 config VARCHAR);""")
336
337 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
338 ON checkerconfigs(config);""")
339
340 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
341 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
342 unitindex INTEGER NOT NULL,
343 fileid INTEGER NOT NULL,
344 configid INTEGER NOT NULL,
345 name VARCHAR NOT NULL,
346 message VARCHAR);""")
347
348 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
349 ON uniterrors(fileid, configid);""")
350
351 @transaction
352 - def _getfileid(self, filename, check_mod_info=True, store=None):
353 """return fileid representing the given file in the statscache.
354
355 if file not in cache or has been updated since last record
356 update, recalculate stats.
357
358 optional argument store can be used to avoid unnessecary
359 reparsing of already loaded translation files.
360
361 store can be a TranslationFile object or a callback that returns one.
362 """
363 realpath = os.path.realpath(filename)
364 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
365 WHERE path=?;""", (realpath,))
366 filerow = self.cur.fetchone()
367 mod_info = get_mod_info(realpath)
368 if filerow:
369 fileid = filerow[0]
370 if not check_mod_info:
371
372 self.cur.execute("""UPDATE files
373 SET st_mtime=?, st_size=?
374 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
375 return fileid
376 if (filerow[1], filerow[2]) == mod_info:
377 return fileid
378
379
380 if callable(store):
381 store = store()
382 else:
383 store = store or factory.getobject(realpath)
384
385 return self._cachestore(store, realpath, mod_info)
386
388 """See if this checker configuration has been used before."""
389 config = str(checker.config.__dict__)
390 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
391 config=?;""", (config,))
392 configrow = self.cur.fetchone()
393 if not configrow or configrow[1] != config:
394 return None
395 else:
396 return configrow[0]
397
398 @transaction
400 """Cache the statistics for the supplied unit(s)."""
401 unitvalues = []
402 for index, unit in enumerate(units):
403 if unit.istranslatable():
404 sourcewords, targetwords = wordsinunit(unit)
405 if unitindex:
406 index = unitindex
407
408 unitvalues.append((unit.getid(), fileid, index, \
409 unit.source, unit.target, \
410 sourcewords, targetwords, \
411 statefordb(unit)))
412 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
413
414 self.cur.executemany("""INSERT INTO units
415 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
416 values (?, ?, ?, ?, ?, ?, ?, ?);""",
417 unitvalues)
418 self.file_totals[fileid] = file_totals_record
419 if unitindex:
420 return state_strings[statefordb(units[0])]
421 return ""
422
423 @transaction
425 """Calculates and caches the statistics of the given store
426 unconditionally."""
427 self.cur.execute("""DELETE FROM files WHERE
428 path=?;""", (realpath,))
429 self.cur.execute("""INSERT INTO files
430 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
431 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
432 fileid = self.cur.lastrowid
433 self.cur.execute("""DELETE FROM units WHERE
434 fileid=?""", (fileid,))
435 self._cacheunitstats(store.units, fileid)
436 return fileid
437
439 """Retrieves the statistics for the given file if possible, otherwise
440 delegates to cachestore()."""
441 return self.file_totals[self._getfileid(filename, store=store)]
442
443 @transaction
445 """Helper method for cachestorechecks() and recacheunit()"""
446
447
448 dummy = (-1, fileid, configid, "noerror", "")
449 unitvalues = [dummy]
450
451 errornames = []
452 for index, unit in enumerate(units):
453 if unit.istranslatable():
454
455 if unitindex:
456 index = unitindex
457 failures = checker.run_filters(unit)
458 for checkname, checkmessage in failures.iteritems():
459 unitvalues.append((index, fileid, configid, checkname, checkmessage))
460 errornames.append("check-" + checkname)
461 checker.setsuggestionstore(None)
462
463 if unitindex:
464
465
466 unitvalues.remove(dummy)
467 errornames.append("total")
468
469
470 self.cur.executemany("""INSERT INTO uniterrors
471 (unitindex, fileid, configid, name, message)
472 values (?, ?, ?, ?, ?);""",
473 unitvalues)
474 return errornames
475
476 @transaction
478 """Calculates and caches the error statistics of the given store
479 unconditionally."""
480
481
482 self.cur.execute("""DELETE FROM uniterrors WHERE
483 fileid=?;""", (fileid,))
484 self._cacheunitschecks(store.units, fileid, configid, checker)
485 return fileid
486
488 values = self.cur.execute("""
489 SELECT state, sourcewords, targetwords
490 FROM units
491 WHERE fileid=? AND unitid=?
492 """, (fileid, unitid))
493 result = values.fetchone()
494 if result is not None:
495 return result
496 else:
497 print >> sys.stderr, """WARNING: Database in inconsistent state.
498 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid)
499
500
501
502 return []
503
504 @transaction
506 """Recalculate all information for a specific unit. This is necessary
507 for updating all statistics when a translation of a unit took place,
508 for example.
509
510 This method assumes that everything was up to date before (file totals,
511 checks, checker config, etc."""
512 fileid = self._getfileid(filename, check_mod_info=False)
513 configid = self._get_config_id(fileid, checker)
514 unitid = unit.getid()
515
516 totals_without_unit = self.file_totals[fileid] - \
517 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
518 self.cur.execute("""SELECT unitindex FROM units WHERE
519 fileid=? AND unitid=?;""", (fileid, unitid))
520 unitindex = self.cur.fetchone()[0]
521 self.cur.execute("""DELETE FROM units WHERE
522 fileid=? AND unitid=?;""", (fileid, unitid))
523 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
524
525 self.cur.execute("""DELETE FROM uniterrors WHERE
526 fileid=? AND unitindex=?;""", (fileid, unitindex))
527 if os.path.exists(suggestion_filename(filename)):
528 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
529 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
530 return state
531
532 - def _checkerrors(self, filename, fileid, configid, checker, store):
533 def geterrors():
534 self.cur.execute("""SELECT
535 name,
536 unitindex
537 FROM uniterrors WHERE fileid=? and configid=?
538 ORDER BY unitindex;""", (fileid, configid))
539 return self.cur.fetchone(), self.cur
540
541 first, cur = geterrors()
542 if first is not None:
543 return first, cur
544
545
546
547 if callable(store):
548 store = store()
549 else:
550 store = store or factory.getobject(filename)
551
552 if os.path.exists(suggestion_filename(filename)):
553 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
554 self._cachestorechecks(fileid, store, checker, configid)
555 return geterrors()
556
557 - def _geterrors(self, filename, fileid, configid, checker, store):
558 result = []
559 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
560 result.append(first)
561 result.extend(cur.fetchall())
562 return result
563
564 @transaction
566 configid = self._getstoredcheckerconfig(checker)
567 if configid:
568 return configid
569 self.cur.execute("""INSERT INTO checkerconfigs
570 (configid, config) values (NULL, ?);""",
571 (str(checker.config.__dict__),))
572 return self.cur.lastrowid
573
574 - def filechecks(self, filename, checker, store=None):
575 """Retrieves the error statistics for the given file if possible,
576 otherwise delegates to cachestorechecks()."""
577 fileid = self._getfileid(filename, store=store)
578 configid = self._get_config_id(fileid, checker)
579 values = self._geterrors(filename, fileid, configid, checker, store)
580
581 errors = emptyfilechecks()
582 for value in values:
583 if value[1] == -1:
584 continue
585 checkkey = 'check-' + value[0]
586 if not checkkey in errors:
587 errors[checkkey] = []
588 errors[checkkey].append(value[1])
589
590 return errors
591
593 fileid = self._getfileid(filename)
594 configid = self._get_config_id(fileid, checker)
595 self._checkerrors(filename, fileid, configid, checker, None)
596 self.cur.execute("""SELECT
597 name,
598 unitindex
599 FROM uniterrors
600 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
601 return self.cur.fetchone() is not None
602
604 """Return a dictionary of unit stats mapping sets of unit
605 indices with those states"""
606 stats = emptyfilestats()
607 fileid = self._getfileid(filename, store=store)
608
609 self.cur.execute("""SELECT
610 state,
611 unitindex
612 FROM units WHERE fileid=?
613 ORDER BY unitindex;""", (fileid,))
614 values = self.cur.fetchall()
615
616 for value in values:
617 stats[state_strings[value[0]]].append(value[1])
618 stats["total"].append(value[1])
619
620 return stats
621
622 - def filestats(self, filename, checker, store=None):
629
630 - def unitstats(self, filename, _lang=None, store=None):
631
632
633
634 """Return a dictionary of property names mapping to arrays which
635 map unit indices to property values.
636
637 Please note that this is different from filestats, since filestats
638 supplies sets of unit indices with a given property, whereas this
639 method supplies arrays which map unit indices to given values."""
640 stats = emptyunitstats()
641
642
643 fileid = self._getfileid(filename, store=store)
644
645 self.cur.execute("""SELECT
646 sourcewords, targetwords
647 FROM units WHERE fileid=?
648 ORDER BY unitindex;""", (fileid,))
649
650 for sourcecount, targetcount in self.cur.fetchall():
651 stats["sourcewordcount"].append(sourcecount)
652 stats["targetwordcount"].append(targetcount)
653
654 return stats
655