Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2008 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26   
 27  from translate.lang import data 
 28   
 29   
30 -def spacestart(str1):
31 """returns all the whitespace from the start of the string""" 32 newstring = u"" 33 for c in str1: 34 if c.isspace(): 35 newstring += c 36 else: 37 break 38 return newstring
39 40
41 -def spaceend(str1):
42 """returns all the whitespace from the end of the string""" 43 newstring = u"" 44 for n in range(len(str1)): 45 c = str1[-1-n] 46 if c.isspace(): 47 newstring = c + newstring 48 else: 49 break 50 return newstring
51 52
53 -def puncstart(str1, punctuation):
54 """returns all the punctuation from the start of the string""" 55 newstring = u"" 56 for c in str1: 57 if c in punctuation or c.isspace(): 58 newstring += c 59 else: 60 break 61 return newstring
62 63
64 -def puncend(str1, punctuation):
65 """returns all the punctuation from the end of the string""" 66 # An implementation with regular expressions was slightly slower. 67 68 newstring = u"" 69 for n in range(len(str1)): 70 c = str1[-1-n] 71 if c in punctuation or c.isspace(): 72 newstring = c + newstring 73 else: 74 break 75 return newstring.replace(u"\u00a0", u" ")
76 77
78 -def ispurepunctuation(str1):
79 """checks whether the string is entirely punctuation""" 80 for c in str1: 81 if c.isalnum(): 82 return False 83 return len(str1)
84 85
86 -def isvalidaccelerator(accelerator, acceptlist=None):
87 """returns whether the given accelerator character is valid 88 89 @type accelerator: character 90 @param accelerator: A character to be checked for accelerator validity 91 @type acceptlist: String 92 @param acceptlist: A list of characters that are permissible as accelerators 93 @rtype: Boolean 94 @return: True if the supplied character is an acceptable accelerator 95 """ 96 assert isinstance(accelerator, unicode) 97 assert isinstance(acceptlist, unicode) or acceptlist is None 98 if len(accelerator) == 0: 99 return False 100 if acceptlist is not None: 101 acceptlist = data.normalize(acceptlist) 102 if accelerator in acceptlist: 103 return True 104 return False 105 else: 106 # Old code path - ensures that we don't get a large number of regressions 107 accelerator = accelerator.replace("_", "") 108 if accelerator in u"-?": 109 return True 110 if not accelerator.isalnum(): 111 return False 112 113 # We don't want to have accelerators on characters with diacritics, so let's 114 # see if the character can decompose. 115 decomposition = unicodedata.decomposition(accelerator) 116 # Next we strip out any extra information like <this> 117 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 118 return decomposition.count(" ") == 0
119 120
121 -def findaccelerators(str1, accelmarker, acceptlist=None):
122 """returns all the accelerators and locations in str1 marked with a given marker""" 123 accelerators = [] 124 badaccelerators = [] 125 currentpos = 0 126 while currentpos >= 0: 127 currentpos = str1.find(accelmarker, currentpos) 128 if currentpos >= 0: 129 accelstart = currentpos 130 currentpos += len(accelmarker) 131 # we assume accelerators are single characters 132 accelend = currentpos + 1 133 if accelend > len(str1): 134 break 135 accelerator = str1[currentpos:accelend] 136 currentpos = accelend 137 if isvalidaccelerator(accelerator, acceptlist): 138 accelerators.append((accelstart, accelerator)) 139 else: 140 badaccelerators.append((accelstart, accelerator)) 141 return accelerators, badaccelerators
142 143
144 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
145 """returns all the variables and locations in str1 marked with a given marker""" 146 variables = [] 147 currentpos = 0 148 while currentpos >= 0: 149 variable = None 150 currentpos = str1.find(startmarker, currentpos) 151 if currentpos >= 0: 152 startmatch = currentpos 153 currentpos += len(startmarker) 154 if endmarker is None: 155 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 156 endmatch = currentpos 157 for n in range(currentpos, len(str1)): 158 if not (str1[n].isalnum() or str1[n] == '_'): 159 endmatch = n 160 break 161 if currentpos == endmatch: 162 endmatch = len(str1) 163 if currentpos < endmatch: 164 variable = str1[currentpos:endmatch] 165 currentpos = endmatch 166 elif type(endmarker) == int: 167 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 168 endmatch = currentpos + endmarker 169 if endmatch > len(str1): 170 break 171 variable = str1[currentpos:endmatch] 172 currentpos = endmatch 173 else: 174 endmatch = str1.find(endmarker, currentpos) 175 if endmatch == -1: 176 break 177 # search backwards in case there's an intervening startmarker (if not it's OK)... 178 start2 = str1.rfind(startmarker, currentpos, endmatch) 179 if start2 != -1: 180 startmatch2 = start2 181 start2 += len(startmarker) 182 if start2 != currentpos: 183 currentpos = start2 184 startmatch = startmatch2 185 variable = str1[currentpos:endmatch] 186 currentpos = endmatch + len(endmarker) 187 if variable is not None and variable not in ignorelist: 188 if not variable or variable.replace("_", "").replace(".", "").isalnum(): 189 variables.append((startmatch, variable)) 190 return variables
191 192
193 -def getaccelerators(accelmarker, acceptlist=None):
194 """returns a function that gets a list of accelerators marked using accelmarker""" 195 196 def getmarkedaccelerators(str1): 197 """returns all the accelerators in str1 marked with a given marker""" 198 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 199 accelerators = [accelerator for accelstart, accelerator in acclocs] 200 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 201 return accelerators, badaccelerators
202 return getmarkedaccelerators 203 204
205 -def getvariables(startmarker, endmarker):
206 """returns a function that gets a list of variables marked using startmarker and endmarker""" 207 208 def getmarkedvariables(str1): 209 """returns all the variables in str1 marked with a given marker""" 210 varlocs = findmarkedvariables(str1, startmarker, endmarker) 211 variables = [variable for accelstart, variable in varlocs] 212 return variables
213 return getmarkedvariables 214 215
216 -def getnumbers(str1):
217 """returns any numbers that are in the string""" 218 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 219 assert isinstance(str1, unicode) 220 numbers = [] 221 innumber = False 222 degreesign = u'\xb0' 223 lastnumber = "" 224 carryperiod = "" 225 for chr1 in str1: 226 if chr1.isdigit(): 227 innumber = True 228 elif innumber: 229 if not (chr1 == '.' or chr1 == degreesign): 230 innumber = False 231 if lastnumber: 232 numbers.append(lastnumber) 233 lastnumber = "" 234 if innumber: 235 if chr1 == degreesign: 236 lastnumber += chr1 237 elif chr1 == '.': 238 carryperiod += chr1 239 else: 240 lastnumber += carryperiod + chr1 241 carryperiod = "" 242 else: 243 carryperiod = "" 244 if innumber: 245 if lastnumber: 246 numbers.append(lastnumber) 247 return numbers
248 249 250 _function_re = re.compile(r'''((?: 251 [\w\.]+ # function or module name - any alpha-numeric character, _, or . 252 (?:(?:::|->|\.)\w+)* # (optional) C++ style Class::Method() syntax or pointer->Method() or module.function() 253 \(\) # Must close with () 254 )+) 255 ''', re.VERBOSE) # shouldn't be locale aware 256 # pam_*_item() IO::String NULL() POE::Component::Client::LDAP->new() POE::Wheel::Null mechanize.UserAgent POSIX::sigaction() window.resizeBy() @fptr() 257
258 -def getfunctions(str1):
259 """returns the functions() that are in a string, while ignoring the trailing 260 punctuation in the given parameter""" 261 if u"()" in str1: 262 return _function_re.findall(str1) 263 else: 264 return []
265 266
267 -def getemails(str1):
268 """returns the email addresses that are in a string""" 269 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
270 271
272 -def geturls(str1):
273 """returns the URIs in a string""" 274 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 275 'ftp:[\w/\.:;+\-~\%#?=&,]+' 276 return re.findall(URLPAT, str1)
277 278
279 -def countaccelerators(accelmarker, acceptlist=None):
280 """returns a function that counts the number of accelerators marked with the given marker""" 281 282 def countmarkedaccelerators(str1): 283 """returns all the variables in str1 marked with a given marker""" 284 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 285 return len(acclocs), len(badlocs)
286 return countmarkedaccelerators 287