1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.mortbay.xml;
16
17 import java.io.File;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.net.URL;
21 import java.util.AbstractList;
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.Iterator;
25 import java.util.Map;
26 import java.util.NoSuchElementException;
27 import java.util.Stack;
28 import java.util.StringTokenizer;
29
30 import javax.xml.parsers.SAXParser;
31 import javax.xml.parsers.SAXParserFactory;
32
33 import org.mortbay.log.Log;
34 import org.mortbay.util.LazyList;
35 import org.xml.sax.Attributes;
36 import org.xml.sax.ContentHandler;
37 import org.xml.sax.InputSource;
38 import org.xml.sax.SAXException;
39 import org.xml.sax.SAXParseException;
40 import org.xml.sax.XMLReader;
41 import org.xml.sax.helpers.DefaultHandler;
42
43
44
45
46
47
48
49
50
51
52
53 public class XmlParser
54 {
55 private Map _redirectMap = new HashMap();
56 private SAXParser _parser;
57 private Map _observerMap;
58 private Stack _observers = new Stack();
59 private String _xpath;
60 private Object _xpaths;
61 private String _dtd;
62
63
64
65
66
67 public XmlParser()
68 {
69 SAXParserFactory factory = SAXParserFactory.newInstance();
70 boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
71 String validating_prop = System.getProperty("org.mortbay.xml.XmlParser.Validating", validating_dft ? "true" : "false");
72 boolean notValidating = Boolean.getBoolean("org.mortbay.xml.XmlParser.NotValidating");
73 boolean validating = !notValidating && Boolean.valueOf(validating_prop).booleanValue();
74 setValidating(validating);
75 }
76
77
78
79
80
81 public XmlParser(boolean validating)
82 {
83 setValidating(validating);
84 }
85
86
87 public void setValidating(boolean validating)
88 {
89 try
90 {
91 SAXParserFactory factory = SAXParserFactory.newInstance();
92 factory.setValidating(validating);
93 _parser = factory.newSAXParser();
94
95 try
96 {
97 if (validating)
98 _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
99 }
100 catch (Exception e)
101 {
102 if (validating)
103 Log.warn("Schema validation may not be supported: ", e);
104 else
105 Log.ignore(e);
106 }
107
108 _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
109 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
110 _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);
111 }
112 catch (Exception e)
113 {
114 Log.warn(Log.EXCEPTION, e);
115 throw new Error(e.toString());
116 }
117 }
118
119
120
121
122
123
124 public synchronized void redirectEntity(String name, URL entity)
125 {
126 if (entity != null)
127 _redirectMap.put(name, entity);
128 }
129
130
131
132
133
134
135 public String getXpath()
136 {
137 return _xpath;
138 }
139
140
141
142
143
144
145
146
147 public void setXpath(String xpath)
148 {
149 _xpath = xpath;
150 StringTokenizer tok = new StringTokenizer(xpath, "| ");
151 while (tok.hasMoreTokens())
152 _xpaths = LazyList.add(_xpaths, tok.nextToken());
153 }
154
155
156 public String getDTD()
157 {
158 return _dtd;
159 }
160
161
162
163
164
165
166
167
168
169
170 public synchronized void addContentHandler(String trigger, ContentHandler observer)
171 {
172 if (_observerMap == null)
173 _observerMap = new HashMap();
174 _observerMap.put(trigger, observer);
175 }
176
177
178 public synchronized Node parse(InputSource source) throws IOException, SAXException
179 {
180 _dtd=null;
181 Handler handler = new Handler();
182 XMLReader reader = _parser.getXMLReader();
183 reader.setContentHandler(handler);
184 reader.setErrorHandler(handler);
185 reader.setEntityResolver(handler);
186 if (Log.isDebugEnabled())
187 Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
188 _parser.parse(source, handler);
189 if (handler._error != null)
190 throw handler._error;
191 Node doc = (Node) handler._top.get(0);
192 handler.clear();
193 return doc;
194 }
195
196
197
198
199
200 public synchronized Node parse(String url) throws IOException, SAXException
201 {
202 if (Log.isDebugEnabled())
203 Log.debug("parse: " + url);
204 return parse(new InputSource(url));
205 }
206
207
208
209
210
211 public synchronized Node parse(File file) throws IOException, SAXException
212 {
213 if (Log.isDebugEnabled())
214 Log.debug("parse: " + file);
215 return parse(new InputSource(file.toURL().toString()));
216 }
217
218
219
220
221
222 public synchronized Node parse(InputStream in) throws IOException, SAXException
223 {
224 _dtd=null;
225 Handler handler = new Handler();
226 XMLReader reader = _parser.getXMLReader();
227 reader.setContentHandler(handler);
228 reader.setErrorHandler(handler);
229 reader.setEntityResolver(handler);
230 _parser.parse(new InputSource(in), handler);
231 if (handler._error != null)
232 throw handler._error;
233 Node doc = (Node) handler._top.get(0);
234 handler.clear();
235 return doc;
236 }
237
238
239
240 private class NoopHandler extends DefaultHandler
241 {
242 Handler _next;
243 int _depth;
244
245 NoopHandler(Handler next)
246 {
247 this._next = next;
248 }
249
250
251 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
252 {
253 _depth++;
254 }
255
256
257 public void endElement(String uri, String localName, String qName) throws SAXException
258 {
259 if (_depth == 0)
260 _parser.getXMLReader().setContentHandler(_next);
261 else
262 _depth--;
263 }
264 }
265
266
267
268 private class Handler extends DefaultHandler
269 {
270 Node _top = new Node(null, null, null);
271 SAXParseException _error;
272 private Node _context = _top;
273 private NoopHandler _noop;
274
275 Handler()
276 {
277 _noop = new NoopHandler(this);
278 }
279
280
281 void clear()
282 {
283 _top = null;
284 _error = null;
285 _context = null;
286 }
287
288
289 public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
290 {
291 String name = (uri == null || uri.equals("")) ? qName : localName;
292 Node node = new Node(_context, name, attrs);
293
294
295
296 if (_xpaths != null)
297 {
298 String path = node.getPath();
299 boolean match = false;
300 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
301 {
302 String xpath = (String) LazyList.get(_xpaths, i);
303
304 match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
305 }
306
307 if (match)
308 {
309 _context.add(node);
310 _context = node;
311 }
312 else
313 {
314 _parser.getXMLReader().setContentHandler(_noop);
315 }
316 }
317 else
318 {
319 _context.add(node);
320 _context = node;
321 }
322
323 ContentHandler observer = null;
324 if (_observerMap != null)
325 observer = (ContentHandler) _observerMap.get(name);
326 _observers.push(observer);
327
328 for (int i = 0; i < _observers.size(); i++)
329 if (_observers.get(i) != null)
330 ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
331 }
332
333
334 public void endElement(String uri, String localName, String qName) throws SAXException
335 {
336 _context = _context._parent;
337 for (int i = 0; i < _observers.size(); i++)
338 if (_observers.get(i) != null)
339 ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
340 _observers.pop();
341 }
342
343
344 public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
345 {
346 for (int i = 0; i < _observers.size(); i++)
347 if (_observers.get(i) != null)
348 ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
349 }
350
351
352 public void characters(char buf[], int offset, int len) throws SAXException
353 {
354 _context.add(new String(buf, offset, len));
355 for (int i = 0; i < _observers.size(); i++)
356 if (_observers.get(i) != null)
357 ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
358 }
359
360
361 public void warning(SAXParseException ex)
362 {
363 Log.debug(Log.EXCEPTION, ex);
364 Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
365 }
366
367
368 public void error(SAXParseException ex) throws SAXException
369 {
370
371 if (_error == null)
372 _error = ex;
373 Log.debug(Log.EXCEPTION, ex);
374 Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
375 }
376
377
378 public void fatalError(SAXParseException ex) throws SAXException
379 {
380 _error = ex;
381 Log.debug(Log.EXCEPTION, ex);
382 Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
383 throw ex;
384 }
385
386
387 private String getLocationString(SAXParseException ex)
388 {
389 return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
390 }
391
392
393 public InputSource resolveEntity(String pid, String sid)
394 {
395 if (Log.isDebugEnabled())
396 Log.debug("resolveEntity(" + pid + ", " + sid + ")");
397
398 if (sid!=null && sid.endsWith(".dtd"))
399 _dtd=sid;
400
401 URL entity = null;
402 if (pid != null)
403 entity = (URL) _redirectMap.get(pid);
404 if (entity == null)
405 entity = (URL) _redirectMap.get(sid);
406 if (entity == null)
407 {
408 String dtd = sid;
409 if (dtd.lastIndexOf('/') >= 0)
410 dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
411
412 if (Log.isDebugEnabled())
413 Log.debug("Can't exact match entity in redirect map, trying " + dtd);
414 entity = (URL) _redirectMap.get(dtd);
415 }
416
417 if (entity != null)
418 {
419 try
420 {
421 InputStream in = entity.openStream();
422 if (Log.isDebugEnabled())
423 Log.debug("Redirected entity " + sid + " --> " + entity);
424 InputSource is = new InputSource(in);
425 is.setSystemId(sid);
426 return is;
427 }
428 catch (IOException e)
429 {
430 Log.ignore(e);
431 }
432 }
433 return null;
434 }
435 }
436
437
438
439
440
441
442 public static class Attribute
443 {
444 private String _name;
445 private String _value;
446
447 Attribute(String n, String v)
448 {
449 _name = n;
450 _value = v;
451 }
452
453 public String getName()
454 {
455 return _name;
456 }
457
458 public String getValue()
459 {
460 return _value;
461 }
462 }
463
464
465
466
467
468
469 public static class Node extends AbstractList
470 {
471 Node _parent;
472 private ArrayList _list;
473 private String _tag;
474 private Attribute[] _attrs;
475 private boolean _lastString = false;
476 private String _path;
477
478
479 Node(Node parent, String tag, Attributes attrs)
480 {
481 _parent = parent;
482 _tag = tag;
483
484 if (attrs != null)
485 {
486 _attrs = new Attribute[attrs.getLength()];
487 for (int i = 0; i < attrs.getLength(); i++)
488 {
489 String name = attrs.getLocalName(i);
490 if (name == null || name.equals(""))
491 name = attrs.getQName(i);
492 _attrs[i] = new Attribute(name, attrs.getValue(i));
493 }
494 }
495 }
496
497
498 public Node getParent()
499 {
500 return _parent;
501 }
502
503
504 public String getTag()
505 {
506 return _tag;
507 }
508
509
510 public String getPath()
511 {
512 if (_path == null)
513 {
514 if (getParent() != null && getParent().getTag() != null)
515 _path = getParent().getPath() + "/" + _tag;
516 else
517 _path = "/" + _tag;
518 }
519 return _path;
520 }
521
522
523
524
525
526 public Attribute[] getAttributes()
527 {
528 return _attrs;
529 }
530
531
532
533
534
535
536
537 public String getAttribute(String name)
538 {
539 return getAttribute(name, null);
540 }
541
542
543
544
545
546
547
548 public String getAttribute(String name, String dft)
549 {
550 if (_attrs == null || name == null)
551 return dft;
552 for (int i = 0; i < _attrs.length; i++)
553 if (name.equals(_attrs[i].getName()))
554 return _attrs[i].getValue();
555 return dft;
556 }
557
558
559
560
561
562 public int size()
563 {
564 if (_list != null)
565 return _list.size();
566 return 0;
567 }
568
569
570
571
572
573
574
575 public Object get(int i)
576 {
577 if (_list != null)
578 return _list.get(i);
579 return null;
580 }
581
582
583
584
585
586
587
588
589 public Node get(String tag)
590 {
591 if (_list != null)
592 {
593 for (int i = 0; i < _list.size(); i++)
594 {
595 Object o = _list.get(i);
596 if (o instanceof Node)
597 {
598 Node n = (Node) o;
599 if (tag.equals(n._tag))
600 return n;
601 }
602 }
603 }
604 return null;
605 }
606
607
608 public void add(int i, Object o)
609 {
610 if (_list == null)
611 _list = new ArrayList();
612 if (o instanceof String)
613 {
614 if (_lastString)
615 {
616 int last = _list.size() - 1;
617 _list.set(last, (String) _list.get(last) + o);
618 }
619 else
620 _list.add(i, o);
621 _lastString = true;
622 }
623 else
624 {
625 _lastString = false;
626 _list.add(i, o);
627 }
628 }
629
630
631 public void clear()
632 {
633 if (_list != null)
634 _list.clear();
635 _list = null;
636 }
637
638
639
640
641
642
643
644
645
646
647 public String getString(String tag, boolean tags, boolean trim)
648 {
649 Node node = get(tag);
650 if (node == null)
651 return null;
652 String s = node.toString(tags);
653 if (s != null && trim)
654 s = s.trim();
655 return s;
656 }
657
658
659 public synchronized String toString()
660 {
661 return toString(true);
662 }
663
664
665
666
667
668
669
670 public synchronized String toString(boolean tag)
671 {
672 StringBuffer buf = new StringBuffer();
673 synchronized (buf)
674 {
675 toString(buf, tag);
676 return buf.toString();
677 }
678 }
679
680
681
682
683
684
685
686 public synchronized String toString(boolean tag, boolean trim)
687 {
688 String s = toString(tag);
689 if (s != null && trim)
690 s = s.trim();
691 return s;
692 }
693
694
695 private synchronized void toString(StringBuffer buf, boolean tag)
696 {
697 if (tag)
698 {
699 buf.append("<");
700 buf.append(_tag);
701
702 if (_attrs != null)
703 {
704 for (int i = 0; i < _attrs.length; i++)
705 {
706 buf.append(' ');
707 buf.append(_attrs[i].getName());
708 buf.append("=\"");
709 buf.append(_attrs[i].getValue());
710 buf.append("\"");
711 }
712 }
713 }
714
715 if (_list != null)
716 {
717 if (tag)
718 buf.append(">");
719 for (int i = 0; i < _list.size(); i++)
720 {
721 Object o = _list.get(i);
722 if (o == null)
723 continue;
724 if (o instanceof Node)
725 ((Node) o).toString(buf, tag);
726 else
727 buf.append(o.toString());
728 }
729 if (tag)
730 {
731 buf.append("</");
732 buf.append(_tag);
733 buf.append(">");
734 }
735 }
736 else if (tag)
737 buf.append("/>");
738 }
739
740
741
742
743
744
745
746
747 public Iterator iterator(final String tag)
748 {
749 return new Iterator()
750 {
751 int c = 0;
752 Node _node;
753
754
755 public boolean hasNext()
756 {
757 if (_node != null)
758 return true;
759 while (_list != null && c < _list.size())
760 {
761 Object o = _list.get(c);
762 if (o instanceof Node)
763 {
764 Node n = (Node) o;
765 if (tag.equals(n._tag))
766 {
767 _node = n;
768 return true;
769 }
770 }
771 c++;
772 }
773 return false;
774 }
775
776
777 public Object next()
778 {
779 try
780 {
781 if (hasNext())
782 return _node;
783 throw new NoSuchElementException();
784 }
785 finally
786 {
787 _node = null;
788 c++;
789 }
790 }
791
792
793 public void remove()
794 {
795 throw new UnsupportedOperationException("Not supported");
796 }
797 };
798 }
799 }
800 }