View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.UnsupportedEncodingException;
18  
19  
20  
21  /* ------------------------------------------------------------ */
22  /** URI Holder.
23   * This class assists with the decoding and encoding or HTTP URI's.
24   * It differs from the java.net.URL class as it does not provide
25   * communications ability, but it does assist with query string
26   * formatting.
27   * <P>UTF-8 encoding is used by default for % encoded characters. This
28   * may be overridden with the org.mortbay.util.URI.charset system property.
29   * @see UrlEncoded
30   * @author Greg Wilkins (gregw)
31   */
32  public class URIUtil
33      implements Cloneable
34  {
35      public static final String SLASH="/";
36      public static final String HTTP="http";
37      public static final String HTTP_COLON="http:";
38      public static final String HTTPS="https";
39      public static final String HTTPS_COLON="https:";
40  
41      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
42      public static final String __CHARSET=System.getProperty("org.mortbay.util.URI.charset",StringUtil.__UTF8);
43      
44      private URIUtil()
45      {}
46      
47      /* ------------------------------------------------------------ */
48      /** Encode a URI path.
49       * This is the same encoding offered by URLEncoder, except that
50       * the '/' character is not encoded.
51       * @param path The path the encode
52       * @return The encoded path
53       */
54      public static String encodePath(String path)
55      {
56          if (path==null || path.length()==0)
57              return path;
58          
59          StringBuffer buf = encodePath(null,path);
60          return buf==null?path:buf.toString();
61      }
62          
63      /* ------------------------------------------------------------ */
64      /** Encode a URI path.
65       * @param path The path the encode
66       * @param buf StringBuffer to encode path into (or null)
67       * @return The StringBuffer or null if no substitutions required.
68       */
69      public static StringBuffer encodePath(StringBuffer buf, String path)
70      {
71          if (buf==null)
72          {
73          loop:
74              for (int i=0;i<path.length();i++)
75              {
76                  char c=path.charAt(i);
77                  switch(c)
78                  {
79                      case '%':
80                      case '?':
81                      case ';':
82                      case '#':
83                      case '\'':
84                      case '"':
85                      case '<':
86                      case '>':
87                      case ' ':
88                          buf=new StringBuffer(path.length()<<1);
89                          break loop;
90                  }
91              }
92              if (buf==null)
93                  return null;
94          }
95          
96          synchronized(buf)
97          {
98              for (int i=0;i<path.length();i++)
99              {
100                 char c=path.charAt(i);       
101                 switch(c)
102                 {
103                   case '%':
104                       buf.append("%25");
105                       continue;
106                   case '?':
107                       buf.append("%3F");
108                       continue;
109                   case ';':
110                       buf.append("%3B");
111                       continue;
112                   case '#':
113                       buf.append("%23");
114                       continue;
115                   case '"':
116                       buf.append("%22");
117                       continue;
118                   case '\'':
119                       buf.append("%27");
120                       continue;
121                   case '<':
122                       buf.append("%3C");
123                       continue;
124                   case '>':
125                       buf.append("%3E");
126                       continue;
127                   case ' ':
128                       buf.append("%20");
129                       continue;
130                   default:
131                       buf.append(c);
132                       continue;
133                 }
134             }
135         }
136 
137         return buf;
138     }
139     
140     /* ------------------------------------------------------------ */
141     /** Encode a URI path.
142      * @param path The path the encode
143      * @param buf StringBuffer to encode path into (or null)
144      * @param encode String of characters to encode. % is always encoded.
145      * @return The StringBuffer or null if no substitutions required.
146      */
147     public static StringBuffer encodeString(StringBuffer buf,
148                                             String path,
149                                             String encode)
150     {
151         if (buf==null)
152         {
153         loop:
154             for (int i=0;i<path.length();i++)
155             {
156                 char c=path.charAt(i);
157                 if (c=='%' || encode.indexOf(c)>=0)
158                 {    
159                     buf=new StringBuffer(path.length()<<1);
160                     break loop;
161                 }
162             }
163             if (buf==null)
164                 return null;
165         }
166         
167         synchronized(buf)
168         {
169             for (int i=0;i<path.length();i++)
170             {
171                 char c=path.charAt(i);
172                 if (c=='%' || encode.indexOf(c)>=0)
173                 {
174                     buf.append('%');
175                     StringUtil.append(buf,(byte)(0xff&c),16);
176                 }
177                 else
178                     buf.append(c);
179             }
180         }
181 
182         return buf;
183     }
184     
185     /* ------------------------------------------------------------ */
186     /* Decode a URI path.
187      * @param path The path the encode
188      * @param buf StringBuffer to encode path into
189      */
190     public static String decodePath(String path)
191     {
192         if (path==null)
193             return null;
194         char[] chars=null;
195         int n=0;
196         byte[] bytes=null;
197         int b=0;
198         
199         int len=path.length();
200         
201         for (int i=0;i<len;i++)
202         {
203             char c = path.charAt(i);
204 
205             if (c=='%' && (i+2)<len)
206             {
207                 if (chars==null)
208                 {
209                     chars=new char[len];
210                     bytes=new byte[len];
211                     path.getChars(0,i,chars,0);
212                 }
213                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
214                 i+=2;
215                 continue;
216             }
217             else if (bytes==null)
218             {
219                 n++;
220                 continue;
221             }
222             
223             if (b>0)
224             {
225                 String s;
226                 try
227                 {
228                     s=new String(bytes,0,b,__CHARSET);
229                 }
230                 catch (UnsupportedEncodingException e)
231                 {       
232                     s=new String(bytes,0,b);
233                 }
234                 s.getChars(0,s.length(),chars,n);
235                 n+=s.length();
236                 b=0;
237             }
238             
239             chars[n++]=c;
240         }
241 
242         if (chars==null)
243             return path;
244 
245         if (b>0)
246         {
247             String s;
248             try
249             {
250                 s=new String(bytes,0,b,__CHARSET);
251             }
252             catch (UnsupportedEncodingException e)
253             {       
254                 s=new String(bytes,0,b);
255             }
256             s.getChars(0,s.length(),chars,n);
257             n+=s.length();
258         }
259         
260         return new String(chars,0,n);
261     }
262     
263     /* ------------------------------------------------------------ */
264     /* Decode a URI path.
265      * @param path The path the encode
266      * @param buf StringBuffer to encode path into
267      */
268     public static String decodePath(byte[] buf, int offset, int length)
269     {
270         byte[] bytes=null;
271         int n=0;
272         
273         for (int i=0;i<length;i++)
274         {
275             byte b = buf[i + offset];
276             
277             if (b=='%' && (i+2)<length)
278             {
279                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
280                 i+=2;
281             }
282             else if (bytes==null)
283             {
284                 n++;
285                 continue;
286             }
287             
288             if (bytes==null)
289             {
290                 bytes=new byte[length];
291                 for (int j=0;j<n;j++)
292                     bytes[j]=buf[j + offset];
293             }
294             
295             bytes[n++]=b;
296         }
297 
298         if (bytes==null)
299             return StringUtil.toString(buf,offset,length,__CHARSET);
300         return StringUtil.toString(bytes,0,n,__CHARSET);
301     }
302 
303     
304     /* ------------------------------------------------------------ */
305     /** Add two URI path segments.
306      * Handles null and empty paths, path and query params (eg ?a=b or
307      * ;JSESSIONID=xxx) and avoids duplicate '/'
308      * @param p1 URI path segment (should be encoded)
309      * @param p2 URI path segment (should be encoded)
310      * @return Legally combined path segments.
311      */
312     public static String addPaths(String p1, String p2)
313     {
314         if (p1==null || p1.length()==0)
315         {
316             if (p1!=null && p2==null)
317                 return p1;
318             return p2;
319         }
320         if (p2==null || p2.length()==0)
321             return p1;
322         
323         int split=p1.indexOf(';');
324         if (split<0)
325             split=p1.indexOf('?');
326         if (split==0)
327             return p2+p1;
328         if (split<0)
329             split=p1.length();
330 
331         StringBuffer buf = new StringBuffer(p1.length()+p2.length()+2);
332         buf.append(p1);
333         
334         if (buf.charAt(split-1)=='/')
335         {
336             if (p2.startsWith(URIUtil.SLASH))
337             {
338                 buf.deleteCharAt(split-1);
339                 buf.insert(split-1,p2);
340             }
341             else
342                 buf.insert(split,p2);
343         }
344         else
345         {
346             if (p2.startsWith(URIUtil.SLASH))
347                 buf.insert(split,p2);
348             else
349             {
350                 buf.insert(split,'/');
351                 buf.insert(split+1,p2);
352             }
353         }
354 
355         return buf.toString();
356     }
357     
358     /* ------------------------------------------------------------ */
359     /** Return the parent Path.
360      * Treat a URI like a directory path and return the parent directory.
361      */
362     public static String parentPath(String p)
363     {
364         if (p==null || URIUtil.SLASH.equals(p))
365             return null;
366         int slash=p.lastIndexOf('/',p.length()-2);
367         if (slash>=0)
368             return p.substring(0,slash+1);
369         return null;
370     }
371     
372     /* ------------------------------------------------------------ */
373     /** Strip parameters from a path.
374      * Return path upto any semicolon parameters.
375      */
376     public static String stripPath(String path)
377     {
378         if (path==null)
379             return null;
380         int semi=path.indexOf(';');
381         if (semi<0)
382             return path;
383         return path.substring(0,semi);
384     }
385     
386     /* ------------------------------------------------------------ */
387     /** Convert a path to a cananonical form.
388      * All instances of "." and ".." are factored out.  Null is returned
389      * if the path tries to .. above its root.
390      * @param path 
391      * @return path or null.
392      */
393     public static String canonicalPath(String path)
394     {
395         if (path==null || path.length()==0)
396             return path;
397 
398         int end=path.length();
399         int start = path.lastIndexOf('/', end);
400 
401     search:
402         while (end>0)
403         {
404             switch(end-start)
405             {
406               case 2: // possible single dot
407                   if (path.charAt(start+1)!='.')
408                       break;
409                   break search;
410               case 3: // possible double dot
411                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
412                       break;
413                   break search;
414             }
415             
416             end=start;
417             start=path.lastIndexOf('/',end-1);
418         }
419 
420         // If we have checked the entire string
421         if (start>=end)
422             return path;
423         
424         StringBuffer buf = new StringBuffer(path);
425         int delStart=-1;
426         int delEnd=-1;
427         int skip=0;
428         
429         while (end>0)
430         {
431             switch(end-start)
432             {       
433               case 2: // possible single dot
434                   if (buf.charAt(start+1)!='.')
435                   {
436                       if (skip>0 && --skip==0)
437                       {   
438                           delStart=start>=0?start:0;
439                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
440                               delStart++;
441                       }
442                       break;
443                   }
444                   
445                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
446                       break;
447                   
448                   if(delEnd<0)
449                       delEnd=end;
450                   delStart=start;
451                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
452                   {
453                       delStart++;
454                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
455                           delEnd++;
456                       break;
457                   }
458                   if (end==buf.length())
459                       delStart++;
460                   
461                   end=start--;
462                   while (start>=0 && buf.charAt(start)!='/')
463                       start--;
464                   continue;
465                   
466               case 3: // possible double dot
467                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
468                   {
469                       if (skip>0 && --skip==0)
470                       {   delStart=start>=0?start:0;
471                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
472                               delStart++;
473                       }
474                       break;
475                   }
476                   
477                   delStart=start;
478                   if (delEnd<0)
479                       delEnd=end;
480 
481                   skip++;
482                   end=start--;
483                   while (start>=0 && buf.charAt(start)!='/')
484                       start--;
485                   continue;
486 
487               default:
488                   if (skip>0 && --skip==0)
489                   {
490                       delStart=start>=0?start:0;
491                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
492                           delStart++;
493                   }
494             }     
495             
496             // Do the delete
497             if (skip<=0 && delStart>=0 && delEnd>=delStart)
498             {  
499                 buf.delete(delStart,delEnd);
500                 delStart=delEnd=-1;
501                 if (skip>0)
502                     delEnd=end;
503             }
504             
505             end=start--;
506             while (start>=0 && buf.charAt(start)!='/')
507                 start--;
508         }      
509 
510         // Too many ..
511         if (skip>0)
512             return null;
513         
514         // Do the delete
515         if (delEnd>=0)
516             buf.delete(delStart,delEnd);
517 
518         return buf.toString();
519     }
520 
521     /* ------------------------------------------------------------ */
522     /** Convert a path to a compact form.
523      * All instances of "//" and "///" etc. are factored out to single "/" 
524      * @param path 
525      * @return path
526      */
527     public static String compactPath(String path)
528     {
529         if (path==null || path.length()==0)
530             return path;
531 
532         int state=0;
533         int end=path.length();
534         int i=0;
535         
536         loop:
537         while (i<end)
538         {
539             char c=path.charAt(i);
540             switch(c)
541             {
542                 case '?':
543                     return path;
544                 case '/':
545                     state++;
546                     if (state==2)
547                         break loop;
548                     break;
549                 default:
550                     state=0;
551             }
552             i++;
553         }
554         
555         if (state<2)
556             return path;
557         
558         StringBuffer buf = new StringBuffer(path.length());
559         char[] chars = path.toCharArray();
560         buf.append(chars,0,i);
561         
562         loop2:
563         while (i<end)
564         {
565             char c=path.charAt(i);
566             switch(c)
567             {
568                 case '?':
569                     buf.append(chars,i,end-i);
570                     break loop2;
571                 case '/':
572                     if (state++==0)
573                         buf.append(c);
574                     break;
575                 default:
576                     state=0;
577                     buf.append(c);
578             }
579             i++;
580         }
581         
582         return buf.toString();
583     }
584 
585     /* ------------------------------------------------------------ */
586     /** 
587      * @param uri URI
588      * @return True if the uri has a scheme
589      */
590     public static boolean hasScheme(String uri)
591     {
592         for (int i=0;i<uri.length();i++)
593         {
594             char c=uri.charAt(i);
595             if (c==':')
596                 return true;
597             if (!(c>='a'&&c<='z' ||
598                   c>='A'&&c<='Z' ||
599                   (i>0 &&(c>='0'&&c<='9' ||
600                           c=='.' ||
601                           c=='+' ||
602                           c=='-'))
603                   ))
604                 break;
605         }
606         return false;
607     }
608     
609 }
610 
611 
612