1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26
27 from translate.lang import data
28
29
31 """returns all the whitespace from the start of the string"""
32 newstring = u""
33 for c in str1:
34 if c.isspace():
35 newstring += c
36 else:
37 break
38 return newstring
39
40
42 """returns all the whitespace from the end of the string"""
43 newstring = u""
44 for n in range(len(str1)):
45 c = str1[-1-n]
46 if c.isspace():
47 newstring = c + newstring
48 else:
49 break
50 return newstring
51
52
54 """returns all the punctuation from the start of the string"""
55 newstring = u""
56 for c in str1:
57 if c in punctuation or c.isspace():
58 newstring += c
59 else:
60 break
61 return newstring
62
63
65 """returns all the punctuation from the end of the string"""
66
67
68 newstring = u""
69 for n in range(len(str1)):
70 c = str1[-1-n]
71 if c in punctuation or c.isspace():
72 newstring = c + newstring
73 else:
74 break
75 return newstring.replace(u"\u00a0", u" ")
76
77
79 """checks whether the string is entirely punctuation"""
80 for c in str1:
81 if c.isalnum():
82 return False
83 return len(str1)
84
85
87 """returns whether the given accelerator character is valid
88
89 @type accelerator: character
90 @param accelerator: A character to be checked for accelerator validity
91 @type acceptlist: String
92 @param acceptlist: A list of characters that are permissible as accelerators
93 @rtype: Boolean
94 @return: True if the supplied character is an acceptable accelerator
95 """
96 assert isinstance(accelerator, unicode)
97 assert isinstance(acceptlist, unicode) or acceptlist is None
98 if len(accelerator) == 0:
99 return False
100 if acceptlist is not None:
101 acceptlist = data.normalize(acceptlist)
102 if accelerator in acceptlist:
103 return True
104 return False
105 else:
106
107 accelerator = accelerator.replace("_", "")
108 if accelerator in u"-?":
109 return True
110 if not accelerator.isalnum():
111 return False
112
113
114
115 decomposition = unicodedata.decomposition(accelerator)
116
117 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
118 return decomposition.count(" ") == 0
119
120
122 """returns all the accelerators and locations in str1 marked with a given marker"""
123 accelerators = []
124 badaccelerators = []
125 currentpos = 0
126 while currentpos >= 0:
127 currentpos = str1.find(accelmarker, currentpos)
128 if currentpos >= 0:
129 accelstart = currentpos
130 currentpos += len(accelmarker)
131
132 accelend = currentpos + 1
133 if accelend > len(str1):
134 break
135 accelerator = str1[currentpos:accelend]
136 currentpos = accelend
137 if isvalidaccelerator(accelerator, acceptlist):
138 accelerators.append((accelstart, accelerator))
139 else:
140 badaccelerators.append((accelstart, accelerator))
141 return accelerators, badaccelerators
142
143
145 """returns all the variables and locations in str1 marked with a given marker"""
146 variables = []
147 currentpos = 0
148 while currentpos >= 0:
149 variable = None
150 currentpos = str1.find(startmarker, currentpos)
151 if currentpos >= 0:
152 startmatch = currentpos
153 currentpos += len(startmarker)
154 if endmarker is None:
155
156 endmatch = currentpos
157 for n in range(currentpos, len(str1)):
158 if not (str1[n].isalnum() or str1[n] == '_'):
159 endmatch = n
160 break
161 if currentpos == endmatch:
162 endmatch = len(str1)
163 if currentpos < endmatch:
164 variable = str1[currentpos:endmatch]
165 currentpos = endmatch
166 elif type(endmarker) == int:
167
168 endmatch = currentpos + endmarker
169 if endmatch > len(str1):
170 break
171 variable = str1[currentpos:endmatch]
172 currentpos = endmatch
173 else:
174 endmatch = str1.find(endmarker, currentpos)
175 if endmatch == -1:
176 break
177
178 start2 = str1.rfind(startmarker, currentpos, endmatch)
179 if start2 != -1:
180 startmatch2 = start2
181 start2 += len(startmarker)
182 if start2 != currentpos:
183 currentpos = start2
184 startmatch = startmatch2
185 variable = str1[currentpos:endmatch]
186 currentpos = endmatch + len(endmarker)
187 if variable is not None and variable not in ignorelist:
188 if not variable or variable.replace("_", "").replace(".", "").isalnum():
189 variables.append((startmatch, variable))
190 return variables
191
192
194 """returns a function that gets a list of accelerators marked using accelmarker"""
195
196 def getmarkedaccelerators(str1):
197 """returns all the accelerators in str1 marked with a given marker"""
198 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
199 accelerators = [accelerator for accelstart, accelerator in acclocs]
200 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
201 return accelerators, badaccelerators
202 return getmarkedaccelerators
203
204
206 """returns a function that gets a list of variables marked using startmarker and endmarker"""
207
208 def getmarkedvariables(str1):
209 """returns all the variables in str1 marked with a given marker"""
210 varlocs = findmarkedvariables(str1, startmarker, endmarker)
211 variables = [variable for accelstart, variable in varlocs]
212 return variables
213 return getmarkedvariables
214
215
217 """returns any numbers that are in the string"""
218
219 assert isinstance(str1, unicode)
220 numbers = []
221 innumber = False
222 degreesign = u'\xb0'
223 lastnumber = ""
224 carryperiod = ""
225 for chr1 in str1:
226 if chr1.isdigit():
227 innumber = True
228 elif innumber:
229 if not (chr1 == '.' or chr1 == degreesign):
230 innumber = False
231 if lastnumber:
232 numbers.append(lastnumber)
233 lastnumber = ""
234 if innumber:
235 if chr1 == degreesign:
236 lastnumber += chr1
237 elif chr1 == '.':
238 carryperiod += chr1
239 else:
240 lastnumber += carryperiod + chr1
241 carryperiod = ""
242 else:
243 carryperiod = ""
244 if innumber:
245 if lastnumber:
246 numbers.append(lastnumber)
247 return numbers
248
249
250 _function_re = re.compile(r'''((?:
251 [\w\.]+ # function or module name - any alpha-numeric character, _, or .
252 (?:(?:::|->|\.)\w+)* # (optional) C++ style Class::Method() syntax or pointer->Method() or module.function()
253 \(\) # Must close with ()
254 )+)
255 ''', re.VERBOSE)
256
257
259 """returns the functions() that are in a string, while ignoring the trailing
260 punctuation in the given parameter"""
261 if u"()" in str1:
262 return _function_re.findall(str1)
263 else:
264 return []
265
266
268 """returns the email addresses that are in a string"""
269 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
270
271
273 """returns the URIs in a string"""
274 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
275 'ftp:[\w/\.:;+\-~\%#?=&,]+'
276 return re.findall(URLPAT, str1)
277
278
280 """returns a function that counts the number of accelerators marked with the given marker"""
281
282 def countmarkedaccelerators(str1):
283 """returns all the variables in str1 marked with a given marker"""
284 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
285 return len(acclocs), len(badlocs)
286 return countmarkedaccelerators
287