Package translate :: Package tools :: Module pocount
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pocount

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2003-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Create string and word counts for supported localization files including: 
 22  XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc 
 23   
 24  See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and 
 25  usage instructions 
 26  """ 
 27   
 28  from optparse import OptionParser 
 29  import os 
 30  import sys 
 31   
 32  from translate.storage import factory 
 33  from translate.storage import statsdb 
 34   
 35  # define style constants 
 36  style_full, style_csv, style_short_strings, style_short_words = range(4) 
 37   
 38  # default output style 
 39  default_style = style_full 
 40   
 41   
42 -def calcstats_old(filename):
43 """This is the previous implementation of calcstats() and is left for 44 comparison and debuging purposes.""" 45 # ignore totally blank or header units 46 try: 47 store = factory.getobject(filename) 48 except ValueError, e: 49 print str(e) 50 return {} 51 units = filter(lambda unit: not unit.isheader(), store.units) 52 translated = translatedmessages(units) 53 fuzzy = fuzzymessages(units) 54 review = filter(lambda unit: unit.isreview(), units) 55 untranslated = untranslatedmessages(units) 56 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units)) 57 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist)) 58 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist)) 59 stats = {} 60 61 #units 62 stats["translated"] = len(translated) 63 stats["fuzzy"] = len(fuzzy) 64 stats["untranslated"] = len(untranslated) 65 stats["review"] = len(review) 66 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"] 67 68 #words 69 stats["translatedsourcewords"] = sourcewords(translated) 70 stats["translatedtargetwords"] = targetwords(translated) 71 stats["fuzzysourcewords"] = sourcewords(fuzzy) 72 stats["untranslatedsourcewords"] = sourcewords(untranslated) 73 stats["reviewsourcewords"] = sourcewords(review) 74 stats["totalsourcewords"] = stats["translatedsourcewords"] + \ 75 stats["fuzzysourcewords"] + \ 76 stats["untranslatedsourcewords"] 77 return stats
78 79
80 -def calcstats(filename):
81 statscache = statsdb.StatsCache() 82 return statscache.filetotals(filename, extended=True)
83 84
85 -def summarize(title, stats, style=style_full, indent=8, incomplete_only=False):
86 """ 87 Print summary for a .po file in specified format. 88 89 @param title: name of .po file 90 @param stats: array with translation statistics for the file specified 91 @param indent: indentation of the 2nd column (length of longest filename) 92 @param incomplete_only: omit fully translated files 93 @type incomplete_only: Boolean 94 @rtype: Boolean 95 @return: 1 if counting incomplete files (incomplete_only=True) and the 96 file is completely translated, 0 otherwise 97 """ 98 99 def percent(denominator, devisor): 100 if devisor == 0: 101 return 0 102 else: 103 return denominator * 100 / devisor
104 105 if incomplete_only and (stats["total"] == stats["translated"]): 106 return 1 107 108 if (style == style_csv): 109 print "%s, " % title, 110 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]), 111 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]), 112 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]), 113 print "%d, %d" % (stats["total"], stats["totalsourcewords"]), 114 if stats["review"] > 0: 115 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]), 116 print 117 elif (style == style_short_strings): 118 spaces = " " * (indent - len(title)) 119 print "%s%s strings: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \ 120 stats["total"], stats["translated"], stats["fuzzy"], stats["untranslated"], \ 121 percent(stats["translated"], stats["total"]), \ 122 percent(stats["fuzzy"], stats["total"]), \ 123 percent(stats["untranslated"], stats["total"])) 124 elif (style == style_short_words): 125 spaces = " " * (indent - len(title)) 126 print "%s%s source words: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \ 127 stats["totalsourcewords"], stats["translatedsourcewords"], stats["fuzzysourcewords"], stats["untranslatedsourcewords"], \ 128 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 129 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]), \ 130 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 131 else: # style == style_full 132 print title 133 print "type strings words (source) words (translation)" 134 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \ 135 (stats["translated"], \ 136 percent(stats["translated"], stats["total"]), \ 137 stats["translatedsourcewords"], \ 138 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 139 stats["translatedtargetwords"]) 140 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \ 141 (stats["fuzzy"], \ 142 percent(stats["fuzzy"], stats["total"]), \ 143 stats["fuzzysourcewords"], \ 144 percent(stats["fuzzysourcewords"], stats["totalsourcewords"])) 145 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \ 146 (stats["untranslated"], \ 147 percent(stats["untranslated"], stats["total"]), \ 148 stats["untranslatedsourcewords"], \ 149 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 150 print "Total: %5d %17d %22d" % \ 151 (stats["total"], \ 152 stats["totalsourcewords"], \ 153 stats["translatedtargetwords"]) 154 if "extended" in stats: 155 print "" 156 for state, e_stats in stats["extended"].iteritems(): 157 print "%s: %5d (%3d%%) %10d (%3d%%) %15d" % ( 158 state, e_stats["units"], percent(e_stats["units"], stats["total"]), 159 e_stats["sourcewords"], percent(e_stats["sourcewords"], stats["totalsourcewords"]), 160 e_stats["targetwords"]) 161 162 if stats["review"] > 0: 163 print "review: %5d %17d n/a" % \ 164 (stats["review"], stats["reviewsourcewords"]) 165 print 166 return 0 167 168
169 -def fuzzymessages(units):
170 return filter(lambda unit: unit.isfuzzy() and unit.target, units)
171 172
173 -def translatedmessages(units):
174 return filter(lambda unit: unit.istranslated(), units)
175 176
177 -def untranslatedmessages(units):
178 return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
179 180
181 -class summarizer:
182
183 - def __init__(self, filenames, style=default_style, incomplete_only=False):
184 self.totals = {} 185 self.filecount = 0 186 self.longestfilename = 0 187 self.style = style 188 self.incomplete_only = incomplete_only 189 self.complete_count = 0 190 191 if (self.style == style_csv): 192 print "Filename, Translated Messages, Translated Source Words, Translated \ 193 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \ 194 Untranslated Source Words, Total Message, Total Source Words, \ 195 Review Messages, Review Source Words" 196 if (self.style == style_short_strings or self.style == style_short_words): 197 for filename in filenames: # find longest filename 198 if (len(filename) > self.longestfilename): 199 self.longestfilename = len(filename) 200 for filename in filenames: 201 if not os.path.exists(filename): 202 print >> sys.stderr, "cannot process %s: does not exist" % filename 203 continue 204 elif os.path.isdir(filename): 205 self.handledir(filename) 206 else: 207 self.handlefile(filename) 208 if self.filecount > 1 and (self.style == style_full): 209 if self.incomplete_only: 210 summarize("TOTAL (incomplete only):", self.totals, 211 incomplete_only=True) 212 print "File count (incomplete): %5d" % (self.filecount - self.complete_count) 213 else: 214 summarize("TOTAL:", self.totals, incomplete_only=False) 215 print "File count: %5d" % (self.filecount) 216 print
217
218 - def updatetotals(self, stats):
219 """Update self.totals with the statistics in stats.""" 220 for key in stats.keys(): 221 if key == "extended": 222 #FIXME: calculate extended totals 223 continue 224 if not key in self.totals: 225 self.totals[key] = 0 226 self.totals[key] += stats[key]
227
228 - def handlefile(self, filename):
229 try: 230 stats = calcstats(filename) 231 self.updatetotals(stats) 232 self.complete_count += summarize(filename, stats, self.style, 233 self.longestfilename, 234 self.incomplete_only) 235 self.filecount += 1 236 except: # This happens if we have a broken file. 237 print >> sys.stderr, sys.exc_info()[1]
238
239 - def handlefiles(self, dirname, filenames):
240 for filename in filenames: 241 pathname = os.path.join(dirname, filename) 242 if os.path.isdir(pathname): 243 self.handledir(pathname) 244 else: 245 self.handlefile(pathname)
246
247 - def handledir(self, dirname):
248 path, name = os.path.split(dirname) 249 if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]: 250 return 251 entries = os.listdir(dirname) 252 self.handlefiles(dirname, entries)
253 254
255 -def main():
256 parser = OptionParser(usage="usage: %prog [options] po-files") 257 parser.add_option("--incomplete", action="store_const", const=True, 258 dest="incomplete_only", 259 help="skip 100% translated files.") 260 # options controlling output format: 261 parser.add_option("--full", action="store_const", const=style_csv, 262 dest="style_full", 263 help="(default) statistics in full, verbose format") 264 parser.add_option("--csv", action="store_const", const=style_csv, 265 dest="style_csv", 266 help="statistics in CSV format") 267 parser.add_option("--short", action="store_const", const=style_csv, 268 dest="style_short_strings", 269 help="same as --short-strings") 270 parser.add_option("--short-strings", action="store_const", 271 const=style_csv, dest="style_short_strings", 272 help="statistics of strings in short format - one line per file") 273 parser.add_option("--short-words", action="store_const", 274 const=style_csv, dest="style_short_words", 275 help="statistics of words in short format - one line per file") 276 277 (options, args) = parser.parse_args() 278 279 if (options.incomplete_only == None): 280 options.incomplete_only = False 281 282 if (options.style_full and options.style_csv) or \ 283 (options.style_full and options.style_short_strings) or \ 284 (options.style_full and options.style_short_words) or \ 285 (options.style_csv and options.style_short_strings) or \ 286 (options.style_csv and options.style_short_words) or \ 287 (options.style_short_strings and options.style_short_words): 288 parser.error("options --full, --csv, --short-strings and --short-words are mutually exclusive") 289 sys.exit(2) 290 291 style = default_style # default output style 292 if options.style_csv: 293 style = style_csv 294 if options.style_full: 295 style = style_full 296 if options.style_short_strings: 297 style = style_short_strings 298 if options.style_short_words: 299 style = style_short_words 300 301 try: 302 import psyco 303 psyco.full() 304 except Exception: 305 pass 306 307 summarizer(args, style, options.incomplete_only)
308 309 if __name__ == '__main__': 310 main() 311