View Javadoc

1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.jetty;
16  
17  import java.io.UnsupportedEncodingException;
18  
19  import org.mortbay.util.MultiMap;
20  import org.mortbay.util.StringUtil;
21  import org.mortbay.util.TypeUtil;
22  import org.mortbay.util.URIUtil;
23  import org.mortbay.util.UrlEncoded;
24  import org.mortbay.util.Utf8StringBuffer;
25  
26  
27  /* ------------------------------------------------------------ */
28  /** Http URI.
29   * Parse a HTTP URI from a string or byte array.  Given a URI
30   * <code>http://user@host:port/path/info;param?query#fragment</code>
31   * this class will split it into the following undecoded optional elements:<ul>
32   * <li>{@link #getScheme()} - http:</li>
33   * <li>{@link #getAuthority()} - //name@host:port</li>
34   * <li>{@link #getHost()} - host</li>
35   * <li>{@link #getPort()} - port</li>
36   * <li>{@link #getPath()} - /path/info</li>
37   * <li>{@link #getParam()} - param</li>
38   * <li>{@link #getQuery()} - query</li>
39   * <li>{@link #getFragment()} - fragment</li>
40   * </ul>
41   * 
42   */
43  public class HttpURI
44  {
45      private static byte[] __empty={}; 
46      private final static int 
47      START=0,
48      AUTH_OR_PATH=1,
49      SCHEME_OR_PATH=2,
50      AUTH=4,
51      IPV6=5,
52      PORT=6,
53      PATH=7,
54      PARAM=8,
55      QUERY=9,
56      ASTERISK=10;
57      
58      boolean _partial=false;
59      byte[] _raw=__empty;
60      String _rawString;
61      int _scheme;
62      int _authority;
63      int _host;
64      int _port;
65      int _path;
66      int _param;
67      int _query;
68      int _fragment;
69      int _end;
70      
71      Utf8StringBuffer _utf8b = new Utf8StringBuffer(64);
72      
73      public HttpURI()
74      {
75          
76      } 
77      
78      /* ------------------------------------------------------------ */
79      /**
80       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
81       */
82      public HttpURI(boolean parsePartialAuth)
83      {
84          _partial=parsePartialAuth;
85      }
86      
87      public HttpURI(String raw)
88      {
89          _rawString=raw;
90          byte[] b = raw.getBytes();
91          parse(b,0,b.length);
92      }
93      
94      public HttpURI(byte[] raw,int offset, int length)
95      {
96          parse2(raw,offset,length);
97      }
98      
99      public void parse(String raw)
100     {
101         byte[] b = raw.getBytes();
102         parse2(b,0,b.length);
103         _rawString=raw;
104     }
105     
106     public void parse(byte[] raw,int offset, int length)
107     {
108         _rawString=null;
109         parse2(raw,offset,length);
110     }
111     
112     private void parse2(byte[] raw,int offset, int length)
113     {
114         _raw=raw;
115         int i=offset;
116         int e=offset+length;
117         int state=START;
118         int m=offset;
119         _end=offset+length;
120         _scheme=offset;
121         _authority=offset;
122         _host=offset;
123         _port=offset;
124         _path=offset;
125         _param=_end;
126         _query=_end;
127         _fragment=_end;
128         while (i<e)
129         {
130             char c=(char)(0xff&_raw[i]);
131             int s=i++;
132             
133             state: switch (state)
134             {
135                 case START:
136                 {
137                     m=s;
138                     switch(c)
139                     {
140                         case '/':
141                             state=AUTH_OR_PATH;
142                             break;
143                         case ';':
144                             _param=s;
145                             state=PARAM;
146                             break;
147                         case '?':
148                             _param=s;
149                             _query=s;
150                             state=QUERY;
151                             break;
152                         case '#':
153                             _param=s;
154                             _query=s;
155                             _fragment=s;
156                             break;
157                         case '*':
158                             _path=s;
159                             state=ASTERISK;
160                             break;
161                             
162                         default:
163                             if (Character.isLetterOrDigit(c))
164                                 state=SCHEME_OR_PATH;
165                             else
166                                 throw new IllegalArgumentException(StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
167                     }
168                     
169                     continue;
170                 }
171 
172                 case AUTH_OR_PATH:
173                 {
174                     if ((_partial||_scheme!=_authority) && c=='/')
175                     {
176                         _host=i;
177                         _port=_end;
178                         _path=_end;
179                         state=AUTH;
180                     }
181                     else if (c==';' || c=='?' || c=='#')
182                     {
183                         i--;
184                         state=PATH;
185                     }  
186                     else
187                     {
188                         _host=m;
189                         _port=m;
190                         state=PATH;
191                     }  
192                     continue;
193                 }
194                 
195                 case SCHEME_OR_PATH:
196                 {
197                     // short cut for http and https
198                     if (length>6 && c=='t')
199                     {
200                         if (_raw[offset+3]==':')
201                         {
202                             s=offset+3;
203                             i=offset+4;
204                             c=':';
205                         }
206                         else if (_raw[offset+4]==':')
207                         {
208                             s=offset+4;
209                             i=offset+5;
210                             c=':';
211                         }
212                         else if (_raw[offset+5]==':')
213                         {
214                             s=offset+5;
215                             i=offset+6;
216                             c=':';
217                         }
218                     }
219                     
220                     switch (c)
221                     {
222                         case ':':
223                         {
224                             m = i++;
225                             _authority = m;
226                             _path = m;
227                             c = (char)(0xff & _raw[i]);
228                             if (c == '/')
229                                 state = AUTH_OR_PATH;
230                             else
231                             {
232                                 _host = m;
233                                 _port = m;
234                                 state = PATH;
235                             }
236                             break;
237                         }
238                         
239                         case '/':
240                         {
241                             state = PATH;
242                             break;
243                         }
244                         
245                         case ';':
246                         {
247                             _param = s;
248                             state = PARAM;
249                             break;
250                         }
251                         
252                         case '?':
253                         {
254                             _param = s;
255                             _query = s;
256                             state = QUERY;
257                             break;
258                         }
259                         
260                         case '#':
261                         {
262                             _param = s;
263                             _query = s;
264                             _fragment = s;
265                             break;
266                         }
267                     }
268                     continue;
269                 }
270                 
271                 case AUTH:
272                 {
273                     switch (c)
274                     {
275 
276                         case '/':
277                         {
278                             m = s;
279                             _path = m;
280                             _port = _path;
281                             state = PATH;
282                             break;
283                         }
284                         case '@':
285                         {
286                             _host = i;
287                             break;
288                         }
289                         case ':':
290                         {
291                             _port = s;
292                             state = PORT;
293                             break;
294                         }
295                         case '[':
296                         {
297                             state = IPV6;
298                             break;
299                         }
300                     }
301                     continue;
302                 }
303 
304                 case IPV6:
305                 {
306                     switch (c)
307                     {
308                         case '/':
309                         {
310                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
311                         }
312                         case ']':
313                         {
314                             state = AUTH;
315                             break;
316                         }
317                     }
318 
319                     continue;
320                 }
321                 
322                 case PORT:
323                 {
324                     if (c=='/')
325                     {
326                         m=s;
327                         _path=m;
328                         if (_port<=_authority)
329                             _port=_path;
330                         state=PATH;
331                     }
332                     continue;
333                 }
334                 
335                 case PATH:
336                 {
337                     switch (c)
338                     {
339                         case ';':
340                         {
341                             _param = s;
342                             state = PARAM;
343                             break;
344                         }
345                         case '?':
346                         {
347                             _param = s;
348                             _query = s;
349                             state = QUERY;
350                             break;
351                         }
352                         case '#':
353                         {
354                             _param = s;
355                             _query = s;
356                             _fragment = s;
357                             break state;
358                         }
359                     }
360                     continue;
361                 }
362                 
363                 case PARAM:
364                 {
365                     switch (c)
366                     {
367                         case '?':
368                         {
369                             _query = s;
370                             state = QUERY;
371                             break;
372                         }
373                         case '#':
374                         {
375                             _query = s;
376                             _fragment = s;
377                             break state;
378                         }
379                     }
380                     continue;
381                 }
382                 
383                 case QUERY:
384                 {
385                     if (c=='#')
386                     {
387                         _fragment=s;
388                         break state;
389                     }
390                     continue;
391                 }
392                 
393                 case ASTERISK:
394                 {
395                     throw new IllegalArgumentException("only '*'");
396                 }
397             }
398         }
399     }
400     
401     private String toUtf8String(int offset,int length)
402     {
403         _utf8b.reset();
404         _utf8b.append(_raw,offset,length);
405         return _utf8b.toString();
406     }
407     
408     public String getScheme()
409     {
410         if (_scheme==_authority)
411             return null;
412         int l=_authority-_scheme;
413         if (l==5 && 
414             _raw[_scheme]=='h' && 
415             _raw[_scheme+1]=='t' && 
416             _raw[_scheme+2]=='t' && 
417             _raw[_scheme+3]=='p' )
418             return HttpSchemes.HTTP;
419         if (l==6 && 
420             _raw[_scheme]=='h' && 
421             _raw[_scheme+1]=='t' && 
422             _raw[_scheme+2]=='t' && 
423             _raw[_scheme+3]=='p' && 
424             _raw[_scheme+4]=='s' )
425             return HttpSchemes.HTTPS;
426         
427         return toUtf8String(_scheme,_authority-_scheme-1);
428     }
429     
430     public String getAuthority()
431     {
432         if (_authority==_path)
433             return null;
434         return toUtf8String(_authority,_path-_authority);
435     }
436     
437     public String getHost()
438     {
439         if (_host==_port)
440             return null;
441         return toUtf8String(_host,_port-_host);
442     }
443     
444     public int getPort()
445     {
446         if (_port==_path)
447             return -1;
448         return TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
449     }
450     
451     public String getPath()
452     {
453         if (_path==_param)
454             return null;
455         return toUtf8String(_path,_param-_path);
456     }
457     
458     public String getDecodedPath()
459     {
460         if (_path==_param)
461             return null;
462 
463         int length = _param-_path;
464         byte[] bytes=null;
465         int n=0;
466 
467         for (int i=_path;i<_param;i++)
468         {
469             byte b = _raw[i];
470             
471             if (b=='%' && (i+2)<_param)
472             {
473                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
474                 i+=2;
475             }
476             else if (bytes==null)
477             {
478                 n++;
479                 continue;
480             }
481             
482             if (bytes==null)
483             {
484                 bytes=new byte[length];
485                 for (int j=0;j<n;j++)
486                     bytes[j]=_raw[_path+j];
487             }
488             
489             bytes[n++]=b;
490         }
491 
492         if (bytes==null)
493             return toUtf8String(_path,length);
494 
495         _utf8b.reset();
496         _utf8b.append(bytes,0,n);
497         return _utf8b.toString();
498     }
499     
500     public String getPathAndParam()
501     {
502         if (_path==_query)
503             return null;
504         return toUtf8String(_path,_query-_path);
505     }
506     
507     public String getCompletePath()
508     {
509         if (_path==_end)
510             return null;
511         return toUtf8String(_path,_end-_path);
512     }
513     
514     public String getParam()
515     {
516         if (_param==_query)
517             return null;
518         return toUtf8String(_param+1,_query-_param-1);
519     }
520     
521     public String getQuery()
522     {
523         if (_query==_fragment)
524             return null;
525         return toUtf8String(_query+1,_fragment-_query-1);
526     }
527     
528     public String getQuery(String encoding)
529     {
530         if (_query==_fragment)
531             return null;
532         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
533     }
534     
535     public boolean hasQuery()
536     {
537         return (_fragment>_query);
538     }
539     
540     public String getFragment()
541     {
542         if (_fragment==_end)
543             return null;
544         return toUtf8String(_fragment+1,_end-_fragment-1);
545     }
546 
547     public void decodeQueryTo(MultiMap parameters) 
548     {
549         if (_query==_fragment)
550             return;
551         _utf8b.reset();
552         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
553     }
554 
555     public void decodeQueryTo(MultiMap parameters, String encoding) 
556         throws UnsupportedEncodingException
557     {
558         if (_query==_fragment)
559             return;
560        
561         if (encoding==null || StringUtil.isUTF8(encoding))
562             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
563         else
564             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
565     }
566 
567     public void clear()
568     {
569         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
570         _raw=__empty;
571         _rawString="";
572     }
573     
574     public String toString()
575     {
576         if (_rawString==null)
577             _rawString=toUtf8String(_scheme,_end-_scheme);
578         return _rawString;
579     }
580     
581     public void writeTo(Utf8StringBuffer buf)
582     {
583         buf.append(_raw,_scheme,_end-_scheme);
584     }
585     
586 }