Package translate :: Package convert :: Module html2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.html2po

 1  #!/usr/bin/env python 
 2  # -*- coding: utf-8 -*- 
 3  # 
 4  # Copyright 2004-2006 Zuza Software Foundation 
 5  # 
 6  # This file is part of translate. 
 7  # 
 8  # translate is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  # 
13  # translate is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with translate; if not, write to the Free Software 
20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
21  # 
22   
23  """convert HTML files to Gettext PO localization files 
24   
25  See: http://translate.sourceforge.net/wiki/toolkit/html2po for examples and 
26  usage instructions 
27  """ 
28   
29  from translate.storage import po 
30  from translate.storage import html 
31   
32   
33 -class html2po:
34
35 - def convertfile(self, inputfile, filename, includeuntagged=False, 36 duplicatestyle="msgctxt", keepcomments=False):
37 """converts a html file to .po format""" 38 thetargetfile = po.pofile() 39 htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged, 40 inputfile=inputfile) 41 for htmlunit in htmlparser.units: 42 thepo = thetargetfile.addsourceunit(htmlunit.source) 43 thepo.addlocations(htmlunit.getlocations()) 44 if keepcomments: 45 thepo.addnote(htmlunit.getnotes(), "developer") 46 thetargetfile.removeduplicates(duplicatestyle) 47 return thetargetfile
48 49
50 -def converthtml(inputfile, outputfile, templates, includeuntagged=False, 51 pot=False, duplicatestyle="msgctxt", keepcomments=False):
52 """reads in stdin using fromfileclass, converts using convertorclass, 53 writes to stdout""" 54 convertor = html2po() 55 outputfilepos = outputfile.tell() 56 outputstore = convertor.convertfile(inputfile, getattr(inputfile, "name", 57 "unknown"), 58 includeuntagged, 59 duplicatestyle=duplicatestyle, 60 keepcomments=keepcomments) 61 outputfile.write(str(outputstore)) 62 return 1
63 64
65 -def main(argv=None):
66 from translate.convert import convert 67 from translate.misc import stdiotell 68 import sys 69 sys.stdout = stdiotell.StdIOWrapper(sys.stdout) 70 formats = {"html": ("po", converthtml), 71 "htm": ("po", converthtml), 72 "xhtml": ("po", converthtml), 73 None: ("po", converthtml), 74 } 75 parser = convert.ConvertOptionParser(formats, usepots=True, 76 description=__doc__) 77 parser.add_option("-u", "--untagged", dest="includeuntagged", 78 default=False, action="store_true", 79 help="include untagged sections") 80 parser.passthrough.append("includeuntagged") 81 parser.add_option("--keepcomments", dest="keepcomments", default=False, 82 action="store_true", 83 help="preserve html comments as translation notes in the output") 84 parser.passthrough.append("keepcomments") 85 parser.add_duplicates_option() 86 parser.passthrough.append("pot") 87 parser.run(argv)
88 89 90 if __name__ == '__main__': 91 main() 92