1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package groovy.util;
19
20 import groovy.util.slurpersupport.GPathResult;
21 import groovy.util.slurpersupport.Node;
22 import groovy.util.slurpersupport.NodeChild;
23
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.Reader;
29 import java.io.StringReader;
30 import java.net.URL;
31 import java.security.AccessController;
32 import java.security.PrivilegedActionException;
33 import java.security.PrivilegedExceptionAction;
34 import java.util.HashMap;
35 import java.util.Hashtable;
36 import java.util.Map;
37 import java.util.Stack;
38
39 import javax.xml.parsers.ParserConfigurationException;
40 import javax.xml.parsers.SAXParser;
41 import javax.xml.parsers.SAXParserFactory;
42
43 import org.xml.sax.Attributes;
44 import org.xml.sax.DTDHandler;
45 import org.xml.sax.EntityResolver;
46 import org.xml.sax.ErrorHandler;
47 import org.xml.sax.InputSource;
48 import org.xml.sax.SAXException;
49 import org.xml.sax.SAXNotRecognizedException;
50 import org.xml.sax.SAXNotSupportedException;
51 import org.xml.sax.XMLReader;
52 import org.xml.sax.helpers.DefaultHandler;
53
54 /***
55 * @author John Wilson
56 *
57 */
58
59 public class XmlSlurper extends DefaultHandler {
60 private final XMLReader reader;
61 private Node currentNode = null;
62 private final Stack stack = new Stack();
63 private final StringBuffer charBuffer = new StringBuffer();
64 private final Map namespaceTagHints = new Hashtable();
65
66 public XmlSlurper() throws ParserConfigurationException, SAXException {
67 this(false, true);
68 }
69
70 public XmlSlurper(final boolean validating, final boolean namespaceAware) throws ParserConfigurationException, SAXException {
71 SAXParserFactory factory = null;
72
73 try {
74 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
75 public Object run() throws ParserConfigurationException {
76 return SAXParserFactory.newInstance();
77 }
78 });
79 } catch (final PrivilegedActionException pae) {
80 final Exception e = pae.getException();
81
82 if (e instanceof ParserConfigurationException) {
83 throw (ParserConfigurationException) e;
84 } else {
85 throw new RuntimeException(e);
86 }
87 }
88 factory.setNamespaceAware(namespaceAware);
89 factory.setValidating(validating);
90
91 final SAXParser parser = factory.newSAXParser();
92 this.reader = parser.getXMLReader();
93 }
94
95 public XmlSlurper(final XMLReader reader) {
96 this.reader = reader;
97 }
98
99 public XmlSlurper(final SAXParser parser) throws SAXException {
100 this(parser.getXMLReader());
101 }
102
103 /***
104 * @return The GPathResult instance created by consuming a stream of SAX events
105 * Note if one of the parse methods has been called then this returns null
106 * Note if this is called more than once all calls after the first will return null
107 *
108 */
109 public GPathResult getDocument() {
110 try {
111 return new NodeChild(this.currentNode, null, this.namespaceTagHints);
112 } finally {
113 this.currentNode = null;
114 }
115 }
116
117 /***
118 * Parse the content of the specified input source into a GPathResult object
119 *
120 * @param input
121 * @return An object which supports GPath expressions
122 * @throws IOException
123 * @throws SAXException
124 */
125 public GPathResult parse(final InputSource input) throws IOException, SAXException {
126 this.reader.setContentHandler(this);
127 this.reader.parse(input);
128
129 return getDocument();
130
131 }
132
133 /***
134 * Parses the content of the given file as XML turning it into a GPathResult object
135 *
136 * @param file
137 * @return An object which supports GPath expressions
138 * @throws IOException
139 * @throws SAXException
140 */
141 public GPathResult parse(final File file) throws IOException, SAXException {
142 final InputSource input = new InputSource(new FileInputStream(file));
143
144 input.setSystemId("file://" + file.getAbsolutePath());
145
146 return parse(input);
147
148 }
149
150 /***
151 * Parse the content of the specified input stream into an GPathResult Object.
152 * Note that using this method will not provide the parser with any URI
153 * for which to find DTDs etc
154 *
155 * @param input
156 * @return An object which supports GPath expressions
157 * @throws IOException
158 * @throws SAXException
159 */
160 public GPathResult parse(final InputStream input) throws IOException, SAXException {
161 return parse(new InputSource(input));
162 }
163
164 /***
165 * Parse the content of the specified reader into a GPathResult Object.
166 * Note that using this method will not provide the parser with any URI
167 * for which to find DTDs etc
168 *
169 * @param in
170 * @return An object which supports GPath expressions
171 * @throws IOException
172 * @throws SAXException
173 */
174 public GPathResult parse(final Reader in) throws IOException, SAXException {
175 return parse(new InputSource(in));
176 }
177
178 /***
179 * Parse the content of the specified URI into a GPathResult Object
180 *
181 * @param uri
182 * @return An object which supports GPath expressions
183 * @throws IOException
184 * @throws SAXException
185 */
186 public GPathResult parse(final String uri) throws IOException, SAXException {
187 return parse(new InputSource(uri));
188 }
189
190 /***
191 * A helper method to parse the given text as XML
192 *
193 * @param text
194 * @return An object which supports GPath expressions
195 */
196 public GPathResult parseText(final String text) throws IOException, SAXException {
197 return parse(new StringReader(text));
198 }
199
200
201
202
203
204
205
206 public DTDHandler getDTDHandler() {
207 return this.reader.getDTDHandler();
208 }
209
210
211
212
213 public EntityResolver getEntityResolver() {
214 return this.reader.getEntityResolver();
215 }
216
217
218
219
220 public ErrorHandler getErrorHandler() {
221 return this.reader.getErrorHandler();
222 }
223
224
225
226
227 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
228 return this.reader.getFeature(uri);
229 }
230
231
232
233
234 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
235 return this.reader.getProperty(uri);
236 }
237
238
239
240
241 public void setDTDHandler(final DTDHandler dtdHandler) {
242 this.reader.setDTDHandler(dtdHandler);
243 }
244
245
246
247
248 public void setEntityResolver(final EntityResolver entityResolver) {
249 this.reader.setEntityResolver(entityResolver);
250 }
251
252 /***
253 * Resolves entities against using the suppied URL as the base for relative URLs
254 *
255 * @param base
256 * The URL used to resolve relative URLs
257 */
258 public void setEntityBaseUrl(final URL base) {
259 this.reader.setEntityResolver(new EntityResolver() {
260 public InputSource resolveEntity(final String publicId, final String systemId) throws IOException {
261 return new InputSource(new URL(base, systemId).openStream());
262 }
263 });
264 }
265
266
267
268
269 public void setErrorHandler(final ErrorHandler errorHandler) {
270 this.reader.setErrorHandler(errorHandler);
271 }
272
273
274
275
276 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
277 this.reader.setFeature(uri, value);
278 }
279
280
281
282
283 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
284 this.reader.setProperty(uri, value);
285 }
286
287
288
289
290
291
292
293
294 public void startDocument() throws SAXException {
295 this.currentNode = null;
296 this.charBuffer.setLength(0);
297 }
298
299
300
301
302 public void startPrefixMapping(final String tag, final String uri) throws SAXException {
303 this.namespaceTagHints.put(tag, uri);
304 }
305
306
307
308
309 public void startElement(final String namespaceURI, final String localName, final String qName, final Attributes atts) throws SAXException {
310 addNonWhitespaceCdata();
311
312 final Map attributes = new HashMap();
313 final Map attributeNamespaces = new HashMap();
314
315 for (int i = atts.getLength() - 1; i != -1; i--) {
316 if (atts.getURI(i).length() == 0) {
317 attributes.put(atts.getQName(i), atts.getValue(i));
318 } else {
319 attributes.put(atts.getLocalName(i), atts.getValue(i));
320 attributeNamespaces.put(atts.getLocalName(i), atts.getURI(i));
321 }
322
323 }
324
325 final Node newElement;
326
327 if (namespaceURI.length() == 0){
328 newElement = new Node(this.currentNode, qName, attributes, attributeNamespaces, namespaceURI);
329 } else {
330 newElement = new Node(this.currentNode, localName, attributes, attributeNamespaces, namespaceURI);
331 }
332
333 if (this.currentNode != null) {
334 this.currentNode.addChild(newElement);
335 }
336
337 this.stack.push(this.currentNode);
338 this.currentNode = newElement;
339 }
340
341
342
343
344 public void characters(final char[] ch, final int start, final int length) throws SAXException {
345 this.charBuffer.append(ch, start, length);
346 }
347
348
349
350
351 public void endElement(final String namespaceURI, final String localName, final String qName) throws SAXException {
352 addNonWhitespaceCdata();
353
354 final Object oldCurrentNode = this.stack.pop();
355
356 if (oldCurrentNode != null) {
357 this.currentNode = (Node)oldCurrentNode;
358 }
359 }
360
361
362
363
364 public void endDocument() throws SAXException {
365 }
366
367
368
369
370 /***
371 *
372 */
373 private void addNonWhitespaceCdata() {
374 if (this.charBuffer.length() != 0) {
375
376
377
378
379
380 final String cdata = this.charBuffer.toString();
381
382 this.charBuffer.setLength(0);
383 if (cdata.trim().length() != 0) {
384 this.currentNode.addChild(cdata);
385 }
386 }
387 }
388 }