View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.xml;
16  
17  import java.io.File;
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.net.URL;
21  import java.util.AbstractList;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.Iterator;
25  import java.util.Map;
26  import java.util.NoSuchElementException;
27  import java.util.Stack;
28  import java.util.StringTokenizer;
29  
30  import javax.xml.parsers.SAXParser;
31  import javax.xml.parsers.SAXParserFactory;
32  
33  import org.mortbay.log.Log;
34  import org.mortbay.util.LazyList;
35  import org.xml.sax.Attributes;
36  import org.xml.sax.ContentHandler;
37  import org.xml.sax.InputSource;
38  import org.xml.sax.SAXException;
39  import org.xml.sax.SAXParseException;
40  import org.xml.sax.XMLReader;
41  import org.xml.sax.helpers.DefaultHandler;
42  
43  /*--------------------------------------------------------------*/
44  /**
45   * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
46   * entity handlers and a mini dom-like document tree.
47   * <P>
48   * By default, the parser is created as a validating parser only if xerces is present. This can be 
49   * configured by setting the "org.mortbay.xml.XmlParser.Validating" system property.
50   * 
51   * @author Greg Wilkins (gregw)
52   */
53  public class XmlParser
54  {
55      private Map _redirectMap = new HashMap();
56      private SAXParser _parser;
57      private Map _observerMap;
58      private Stack _observers = new Stack();
59      private String _xpath;
60      private Object _xpaths;
61      private String _dtd;
62  
63      /* ------------------------------------------------------------ */
64      /**
65       * Construct
66       */
67      public XmlParser()
68      {
69          SAXParserFactory factory = SAXParserFactory.newInstance();
70          boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces.");
71          String validating_prop = System.getProperty("org.mortbay.xml.XmlParser.Validating", validating_dft ? "true" : "false");
72          boolean notValidating = Boolean.getBoolean("org.mortbay.xml.XmlParser.NotValidating"); // deprecated!
73          boolean validating = !notValidating && Boolean.valueOf(validating_prop).booleanValue();
74          setValidating(validating);
75      }
76  
77      /* ------------------------------------------------------------ */
78      /**
79       * Constructor.
80       */
81      public XmlParser(boolean validating)
82      {
83          setValidating(validating);
84      }
85      
86      /* ------------------------------------------------------------ */
87      public void setValidating(boolean validating)
88      {
89          try
90          {
91              SAXParserFactory factory = SAXParserFactory.newInstance();
92              factory.setValidating(validating);
93              _parser = factory.newSAXParser();
94              
95              try
96              {
97                  if (validating)
98                      _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating);
99              }
100             catch (Exception e)
101             {
102                 if (validating)
103                     Log.warn("Schema validation may not be supported: ", e);
104                 else
105                     Log.ignore(e);
106             }
107 
108             _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating);
109             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true);
110             _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false);  
111         }
112         catch (Exception e)
113         {
114             Log.warn(Log.EXCEPTION, e);
115             throw new Error(e.toString());
116         }
117     }
118     
119     /* ------------------------------------------------------------ */
120     /**
121      * @param name
122      * @param entity
123      */
124     public synchronized void redirectEntity(String name, URL entity)
125     {
126         if (entity != null)
127             _redirectMap.put(name, entity);
128     }
129 
130     /* ------------------------------------------------------------ */
131     /**
132      * 
133      * @return Returns the xpath.
134      */
135     public String getXpath()
136     {
137         return _xpath;
138     }
139 
140     /* ------------------------------------------------------------ */
141     /**
142      * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently
143      * only path like "/node1/nodeA | /node1/nodeB" are supported.
144      * 
145      * @param xpath The xpath to set.
146      */
147     public void setXpath(String xpath)
148     {
149         _xpath = xpath;
150         StringTokenizer tok = new StringTokenizer(xpath, "| ");
151         while (tok.hasMoreTokens())
152             _xpaths = LazyList.add(_xpaths, tok.nextToken());
153     }
154 
155     /* ------------------------------------------------------------ */
156     public String getDTD()
157     {
158         return _dtd;
159     }
160 
161     /* ------------------------------------------------------------ */
162     /**
163      * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX
164      * events are passed to the ContentHandler provided from a matching start element to the
165      * corresponding end element. Only a single _content handler can be registered against each tag.
166      * 
167      * @param trigger Tag local or q name.
168      * @param observer SAX ContentHandler
169      */
170     public synchronized void addContentHandler(String trigger, ContentHandler observer)
171     {
172         if (_observerMap == null)
173             _observerMap = new HashMap();
174         _observerMap.put(trigger, observer);
175     }
176 
177     /* ------------------------------------------------------------ */
178     public synchronized Node parse(InputSource source) throws IOException, SAXException
179     {
180         _dtd=null;
181         Handler handler = new Handler();
182         XMLReader reader = _parser.getXMLReader();
183         reader.setContentHandler(handler);
184         reader.setErrorHandler(handler);
185         reader.setEntityResolver(handler);
186         if (Log.isDebugEnabled())
187             Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId());
188         _parser.parse(source, handler);
189         if (handler._error != null)
190             throw handler._error;
191         Node doc = (Node) handler._top.get(0);
192         handler.clear();
193         return doc;
194     }
195 
196     /* ------------------------------------------------------------ */
197     /**
198      * Parse String URL.
199      */
200     public synchronized Node parse(String url) throws IOException, SAXException
201     {
202         if (Log.isDebugEnabled())
203             Log.debug("parse: " + url);
204         return parse(new InputSource(url));
205     }
206 
207     /* ------------------------------------------------------------ */
208     /**
209      * Parse File.
210      */
211     public synchronized Node parse(File file) throws IOException, SAXException
212     {
213         if (Log.isDebugEnabled())
214             Log.debug("parse: " + file);
215         return parse(new InputSource(file.toURL().toString()));
216     }
217 
218     /* ------------------------------------------------------------ */
219     /**
220      * Parse InputStream.
221      */
222     public synchronized Node parse(InputStream in) throws IOException, SAXException
223     {
224         _dtd=null;
225         Handler handler = new Handler();
226         XMLReader reader = _parser.getXMLReader();
227         reader.setContentHandler(handler);
228         reader.setErrorHandler(handler);
229         reader.setEntityResolver(handler);
230         _parser.parse(new InputSource(in), handler);
231         if (handler._error != null)
232             throw handler._error;
233         Node doc = (Node) handler._top.get(0);
234         handler.clear();
235         return doc;
236     }
237 
238     /* ------------------------------------------------------------ */
239     /* ------------------------------------------------------------ */
240     private class NoopHandler extends DefaultHandler
241     {
242         Handler _next;
243         int _depth;
244 
245         NoopHandler(Handler next)
246         {
247             this._next = next;
248         }
249 
250         /* ------------------------------------------------------------ */
251         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
252         {
253             _depth++;
254         }
255 
256         /* ------------------------------------------------------------ */
257         public void endElement(String uri, String localName, String qName) throws SAXException
258         {
259             if (_depth == 0)
260                 _parser.getXMLReader().setContentHandler(_next);
261             else
262                 _depth--;
263         }
264     }
265     
266     /* ------------------------------------------------------------ */
267     /* ------------------------------------------------------------ */
268     private class Handler extends DefaultHandler
269     {
270         Node _top = new Node(null, null, null);
271         SAXParseException _error;
272         private Node _context = _top;
273         private NoopHandler _noop;
274 
275         Handler()
276         {
277             _noop = new NoopHandler(this);
278         }
279 
280         /* ------------------------------------------------------------ */
281         void clear()
282         {
283             _top = null;
284             _error = null;
285             _context = null;
286         }
287 
288         /* ------------------------------------------------------------ */
289         public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException
290         {
291             String name = (uri == null || uri.equals("")) ? qName : localName;
292             Node node = new Node(_context, name, attrs);
293             
294 
295             // check if the node matches any xpaths set?
296             if (_xpaths != null)
297             {
298                 String path = node.getPath();
299                 boolean match = false;
300                 for (int i = LazyList.size(_xpaths); !match && i-- > 0;)
301                 {
302                     String xpath = (String) LazyList.get(_xpaths, i);
303 
304                     match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/';
305                 }
306 
307                 if (match)
308                 {
309                     _context.add(node);
310                     _context = node;
311                 }
312                 else
313                 {
314                     _parser.getXMLReader().setContentHandler(_noop);
315                 }
316             }
317             else
318             {
319                 _context.add(node);
320                 _context = node;
321             }
322 
323             ContentHandler observer = null;
324             if (_observerMap != null)
325                 observer = (ContentHandler) _observerMap.get(name);
326             _observers.push(observer);
327 
328             for (int i = 0; i < _observers.size(); i++)
329                 if (_observers.get(i) != null)
330                     ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs);
331         }
332 
333         /* ------------------------------------------------------------ */
334         public void endElement(String uri, String localName, String qName) throws SAXException
335         {
336             _context = _context._parent;
337             for (int i = 0; i < _observers.size(); i++)
338                 if (_observers.get(i) != null)
339                     ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName);
340             _observers.pop();
341         }
342 
343         /* ------------------------------------------------------------ */
344         public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException
345         {
346             for (int i = 0; i < _observers.size(); i++)
347                 if (_observers.get(i) != null)
348                     ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len);
349         }
350 
351         /* ------------------------------------------------------------ */
352         public void characters(char buf[], int offset, int len) throws SAXException
353         {
354             _context.add(new String(buf, offset, len));
355             for (int i = 0; i < _observers.size(); i++)
356                 if (_observers.get(i) != null)
357                     ((ContentHandler) _observers.get(i)).characters(buf, offset, len);
358         }
359 
360         /* ------------------------------------------------------------ */
361         public void warning(SAXParseException ex)
362         {
363             Log.debug(Log.EXCEPTION, ex);
364             Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString());
365         }
366 
367         /* ------------------------------------------------------------ */
368         public void error(SAXParseException ex) throws SAXException
369         {
370             // Save error and continue to report other errors
371             if (_error == null)
372                 _error = ex;
373             Log.debug(Log.EXCEPTION, ex);
374             Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString());
375         }
376 
377         /* ------------------------------------------------------------ */
378         public void fatalError(SAXParseException ex) throws SAXException
379         {
380             _error = ex;
381             Log.debug(Log.EXCEPTION, ex);
382             Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString());
383             throw ex;
384         }
385 
386         /* ------------------------------------------------------------ */
387         private String getLocationString(SAXParseException ex)
388         {
389             return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber();
390         }
391 
392         /* ------------------------------------------------------------ */
393         public InputSource resolveEntity(String pid, String sid)
394         {
395             if (Log.isDebugEnabled())
396                 Log.debug("resolveEntity(" + pid + ", " + sid + ")");
397 
398             if (sid!=null && sid.endsWith(".dtd"))
399                 _dtd=sid;
400             
401             URL entity = null;
402             if (pid != null)
403                 entity = (URL) _redirectMap.get(pid);
404             if (entity == null)
405                 entity = (URL) _redirectMap.get(sid);
406             if (entity == null)
407             {
408                 String dtd = sid;
409                 if (dtd.lastIndexOf('/') >= 0)
410                     dtd = dtd.substring(dtd.lastIndexOf('/') + 1);
411 
412                 if (Log.isDebugEnabled())
413                     Log.debug("Can't exact match entity in redirect map, trying " + dtd);
414                 entity = (URL) _redirectMap.get(dtd);
415             }
416 
417             if (entity != null)
418             {
419                 try
420                 {
421                     InputStream in = entity.openStream();
422                     if (Log.isDebugEnabled())
423                         Log.debug("Redirected entity " + sid + " --> " + entity);
424                     InputSource is = new InputSource(in);
425                     is.setSystemId(sid);
426                     return is;
427                 }
428                 catch (IOException e)
429                 {
430                     Log.ignore(e);
431                 }
432             }
433             return null;
434         }
435     }
436 
437     /* ------------------------------------------------------------ */
438     /* ------------------------------------------------------------ */
439     /**
440      * XML Attribute.
441      */
442     public static class Attribute
443     {
444         private String _name;
445         private String _value;
446 
447         Attribute(String n, String v)
448         {
449             _name = n;
450             _value = v;
451         }
452 
453         public String getName()
454         {
455             return _name;
456         }
457 
458         public String getValue()
459         {
460             return _value;
461         }
462     }
463 
464     /* ------------------------------------------------------------ */
465     /* ------------------------------------------------------------ */
466     /**
467      * XML Node. Represents an XML element with optional attributes and ordered content.
468      */
469     public static class Node extends AbstractList
470     {
471         Node _parent;
472         private ArrayList _list;
473         private String _tag;
474         private Attribute[] _attrs;
475         private boolean _lastString = false;
476         private String _path;
477 
478         /* ------------------------------------------------------------ */
479         Node(Node parent, String tag, Attributes attrs)
480         {
481             _parent = parent;
482             _tag = tag;
483 
484             if (attrs != null)
485             {
486                 _attrs = new Attribute[attrs.getLength()];
487                 for (int i = 0; i < attrs.getLength(); i++)
488                 {
489                     String name = attrs.getLocalName(i);
490                     if (name == null || name.equals(""))
491                         name = attrs.getQName(i);
492                     _attrs[i] = new Attribute(name, attrs.getValue(i));
493                 }
494             }
495         }
496 
497         /* ------------------------------------------------------------ */
498         public Node getParent()
499         {
500             return _parent;
501         }
502 
503         /* ------------------------------------------------------------ */
504         public String getTag()
505         {
506             return _tag;
507         }
508 
509         /* ------------------------------------------------------------ */
510         public String getPath()
511         {
512             if (_path == null)
513             {
514                 if (getParent() != null && getParent().getTag() != null)
515                     _path = getParent().getPath() + "/" + _tag;
516                 else
517                     _path = "/" + _tag;
518             }
519             return _path;
520         }
521 
522         /* ------------------------------------------------------------ */
523         /**
524          * Get an array of element attributes.
525          */
526         public Attribute[] getAttributes()
527         {
528             return _attrs;
529         }
530 
531         /* ------------------------------------------------------------ */
532         /**
533          * Get an element attribute.
534          * 
535          * @return attribute or null.
536          */
537         public String getAttribute(String name)
538         {
539             return getAttribute(name, null);
540         }
541 
542         /* ------------------------------------------------------------ */
543         /**
544          * Get an element attribute.
545          * 
546          * @return attribute or null.
547          */
548         public String getAttribute(String name, String dft)
549         {
550             if (_attrs == null || name == null)
551                 return dft;
552             for (int i = 0; i < _attrs.length; i++)
553                 if (name.equals(_attrs[i].getName()))
554                     return _attrs[i].getValue();
555             return dft;
556         }
557 
558         /* ------------------------------------------------------------ */
559         /**
560          * Get the number of children nodes.
561          */
562         public int size()
563         {
564             if (_list != null)
565                 return _list.size();
566             return 0;
567         }
568 
569         /* ------------------------------------------------------------ */
570         /**
571          * Get the ith child node or content.
572          * 
573          * @return Node or String.
574          */
575         public Object get(int i)
576         {
577             if (_list != null)
578                 return _list.get(i);
579             return null;
580         }
581 
582         /* ------------------------------------------------------------ */
583         /**
584          * Get the first child node with the tag.
585          * 
586          * @param tag
587          * @return Node or null.
588          */
589         public Node get(String tag)
590         {
591             if (_list != null)
592             {
593                 for (int i = 0; i < _list.size(); i++)
594                 {
595                     Object o = _list.get(i);
596                     if (o instanceof Node)
597                     {
598                         Node n = (Node) o;
599                         if (tag.equals(n._tag))
600                             return n;
601                     }
602                 }
603             }
604             return null;
605         }
606 
607         /* ------------------------------------------------------------ */
608         public void add(int i, Object o)
609         {
610             if (_list == null)
611                 _list = new ArrayList();
612             if (o instanceof String)
613             {
614                 if (_lastString)
615                 {
616                     int last = _list.size() - 1;
617                     _list.set(last, (String) _list.get(last) + o);
618                 }
619                 else
620                     _list.add(i, o);
621                 _lastString = true;
622             }
623             else
624             {
625                 _lastString = false;
626                 _list.add(i, o);
627             }
628         }
629 
630         /* ------------------------------------------------------------ */
631         public void clear()
632         {
633             if (_list != null)
634                 _list.clear();
635             _list = null;
636         }
637 
638         /* ------------------------------------------------------------ */
639         /**
640          * Get a tag as a string.
641          * 
642          * @param tag The tag to get
643          * @param tags IF true, tags are included in the value.
644          * @param trim If true, trim the value.
645          * @return results of get(tag).toString(tags).
646          */
647         public String getString(String tag, boolean tags, boolean trim)
648         {
649             Node node = get(tag);
650             if (node == null)
651                 return null;
652             String s = node.toString(tags);
653             if (s != null && trim)
654                 s = s.trim();
655             return s;
656         }
657 
658         /* ------------------------------------------------------------ */
659         public synchronized String toString()
660         {
661             return toString(true);
662         }
663 
664         /* ------------------------------------------------------------ */
665         /**
666          * Convert to a string.
667          * 
668          * @param tag If false, only _content is shown.
669          */
670         public synchronized String toString(boolean tag)
671         {
672             StringBuffer buf = new StringBuffer();
673             synchronized (buf)
674             {
675                 toString(buf, tag);
676                 return buf.toString();
677             }
678         }
679 
680         /* ------------------------------------------------------------ */
681         /**
682          * Convert to a string.
683          * 
684          * @param tag If false, only _content is shown.
685          */
686         public synchronized String toString(boolean tag, boolean trim)
687         {
688             String s = toString(tag);
689             if (s != null && trim)
690                 s = s.trim();
691             return s;
692         }
693 
694         /* ------------------------------------------------------------ */
695         private synchronized void toString(StringBuffer buf, boolean tag)
696         {
697             if (tag)
698             {
699                 buf.append("<");
700                 buf.append(_tag);
701 
702                 if (_attrs != null)
703                 {
704                     for (int i = 0; i < _attrs.length; i++)
705                     {
706                         buf.append(' ');
707                         buf.append(_attrs[i].getName());
708                         buf.append("=\"");
709                         buf.append(_attrs[i].getValue());
710                         buf.append("\"");
711                     }
712                 }
713             }
714 
715             if (_list != null)
716             {
717                 if (tag)
718                     buf.append(">");
719                 for (int i = 0; i < _list.size(); i++)
720                 {
721                     Object o = _list.get(i);
722                     if (o == null)
723                         continue;
724                     if (o instanceof Node)
725                         ((Node) o).toString(buf, tag);
726                     else
727                         buf.append(o.toString());
728                 }
729                 if (tag)
730                 {
731                     buf.append("</");
732                     buf.append(_tag);
733                     buf.append(">");
734                 }
735             }
736             else if (tag)
737                 buf.append("/>");
738         }
739 
740         /* ------------------------------------------------------------ */
741         /**
742          * Iterator over named child nodes.
743          * 
744          * @param tag The tag of the nodes.
745          * @return Iterator over all child nodes with the specified tag.
746          */
747         public Iterator iterator(final String tag)
748         {
749             return new Iterator()
750             {
751                 int c = 0;
752                 Node _node;
753 
754                 /* -------------------------------------------------- */
755                 public boolean hasNext()
756                 {
757                     if (_node != null)
758                         return true;
759                     while (_list != null && c < _list.size())
760                     {
761                         Object o = _list.get(c);
762                         if (o instanceof Node)
763                         {
764                             Node n = (Node) o;
765                             if (tag.equals(n._tag))
766                             {
767                                 _node = n;
768                                 return true;
769                             }
770                         }
771                         c++;
772                     }
773                     return false;
774                 }
775 
776                 /* -------------------------------------------------- */
777                 public Object next()
778                 {
779                     try
780                     {
781                         if (hasNext())
782                             return _node;
783                         throw new NoSuchElementException();
784                     }
785                     finally
786                     {
787                         _node = null;
788                         c++;
789                     }
790                 }
791 
792                 /* -------------------------------------------------- */
793                 public void remove()
794                 {
795                     throw new UnsupportedOperationException("Not supported");
796                 }
797             };
798         }
799     }
800 }