View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.UnsupportedEncodingException;
21  import java.util.Iterator;
22  import java.util.Map;
23  
24  
25  /* ------------------------------------------------------------ */
26  /** Handles coding of MIME  "x-www-form-urlencoded".
27   * This class handles the encoding and decoding for either
28   * the query string of a URL or the _content of a POST HTTP request.
29   *
30   * <p><h4>Notes</h4>
31   * The hashtable either contains String single values, vectors
32   * of String or arrays of Strings.
33   * <p>
34   * The UTF-8 charset is assumed, unless otherwise defined by either
35   * passing a parameter or setting the "org.mortbay.util.UrlEncoding.charset"
36   * System property.
37   * <p>
38   * This class is only partially synchronized.  In particular, simple
39   * get operations are not protected from concurrent updates.
40   *
41   * @see java.net.URLEncoder
42   */
43  public class UrlEncoded extends MultiMap
44  {
45      public static final String ENCODING = System.getProperty("org.mortbay.util.UrlEncoding.charset",StringUtil.__UTF8);
46      
47      /* ----------------------------------------------------------------- */
48      public UrlEncoded(UrlEncoded url)
49      {
50          super(url);
51      }
52      
53      /* ----------------------------------------------------------------- */
54      public UrlEncoded()
55      {
56          super(6);
57      }
58      
59      /* ----------------------------------------------------------------- */
60      public UrlEncoded(String s)
61      {
62          super(6);
63          decode(s,ENCODING);
64      }
65      
66      /* ----------------------------------------------------------------- */
67      public UrlEncoded(String s, String charset)
68      {
69          super(6);
70          decode(s,charset);
71      }
72      
73      /* ----------------------------------------------------------------- */
74      public void decode(String query)
75      {
76          decodeTo(query,this,ENCODING);
77      }
78      
79      /* ----------------------------------------------------------------- */
80      public void decode(String query,String charset)
81      {
82          decodeTo(query,this,charset);
83      }
84      
85      /* -------------------------------------------------------------- */
86      /** Encode Hashtable with % encoding.
87       */
88      public String encode()
89      {
90          return encode(ENCODING,false);
91      }
92      
93      /* -------------------------------------------------------------- */
94      /** Encode Hashtable with % encoding.
95       */
96      public String encode(String charset)
97      {
98          return encode(charset,false);
99      }
100     
101     /* -------------------------------------------------------------- */
102     /** Encode Hashtable with % encoding.
103      * @param equalsForNullValue if True, then an '=' is always used, even
104      * for parameters without a value. e.g. "blah?a=&b=&c=".
105      */
106     public synchronized String encode(String charset, boolean equalsForNullValue)
107     {
108         return encode(this,charset,equalsForNullValue);
109     }
110     
111     /* -------------------------------------------------------------- */
112     /** Encode Hashtable with % encoding.
113      * @param equalsForNullValue if True, then an '=' is always used, even
114      * for parameters without a value. e.g. "blah?a=&b=&c=".
115      */
116     public static String encode(MultiMap map, String charset, boolean equalsForNullValue)
117     {
118         if (charset==null)
119             charset=ENCODING;
120         
121         StringBuffer result = new StringBuffer(128);
122         synchronized(result)
123         {
124             Iterator iter = map.entrySet().iterator();
125             while(iter.hasNext())
126             {
127                 Map.Entry entry = (Map.Entry)iter.next();
128                 
129                 String key = entry.getKey().toString();
130                 Object list = entry.getValue();
131                 int s=LazyList.size(list);
132                 
133                 if (s==0)
134                 {
135                     result.append(encodeString(key,charset));
136                     if(equalsForNullValue)
137                         result.append('=');
138                 }
139                 else
140                 {
141                     for (int i=0;i<s;i++)
142                     {
143                         if (i>0)
144                             result.append('&');
145                         Object val=LazyList.get(list,i);
146                         result.append(encodeString(key,charset));
147 
148                         if (val!=null)
149                         {
150                             String str=val.toString();
151                             if (str.length()>0)
152                             {
153                                 result.append('=');
154                                 result.append(encodeString(str,charset));
155                             }
156                             else if (equalsForNullValue)
157                                 result.append('=');
158                         }
159                         else if (equalsForNullValue)
160                             result.append('=');
161                     }
162                 }
163                 if (iter.hasNext())
164                     result.append('&');
165             }
166             return result.toString();
167         }
168     }
169 
170 
171     /* -------------------------------------------------------------- */
172     /** Decoded parameters to Map.
173      * @param content the string containing the encoded parameters
174      */
175     public static void decodeTo(String content, MultiMap map, String charset)
176     {
177         if (charset==null)
178             charset=ENCODING;
179 
180         synchronized(map)
181         {
182             String key = null;
183             String value = null;
184             int mark=-1;
185             boolean encoded=false;
186             for (int i=0;i<content.length();i++)
187             {
188                 char c = content.charAt(i);
189                 switch (c)
190                 {
191                   case '&':
192                       int l=i-mark-1;
193                       value = l==0?"":
194                           (encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1,i));
195                       mark=i;
196                       encoded=false;
197                       if (key != null)
198                       {
199                           map.add(key,value);
200                       }
201                       else if (value!=null&&value.length()>0)
202                       {
203                           map.add(value,"");
204                       }
205                       key = null;
206                       value=null;
207                       break;
208                   case '=':
209                       if (key!=null)
210                           break;
211                       key = encoded?decodeString(content,mark+1,i-mark-1,charset):content.substring(mark+1,i);
212                       mark=i;
213                       encoded=false;
214                       break;
215                   case '+':
216                       encoded=true;
217                       break;
218                   case '%':
219                       encoded=true;
220                       break;
221                 }                
222             }
223             
224             if (key != null)
225             {
226                 int l=content.length()-mark-1;
227                 value = l==0?"":(encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1));
228                 map.add(key,value);
229             }
230             else if (mark<content.length())
231             {
232                 key = encoded
233                     ?decodeString(content,mark+1,content.length()-mark-1,charset)
234                     :content.substring(mark+1);
235                 map.add(key,"");
236             }
237         }
238     }
239 
240     /* -------------------------------------------------------------- */
241     /** Decoded parameters to Map.
242      * @param data the byte[] containing the encoded parameters
243      */
244     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map)
245     {
246         decodeUtf8To(raw,offset,length,map,new Utf8StringBuffer());
247     }
248 
249     /* -------------------------------------------------------------- */
250     /** Decoded parameters to Map.
251      * @param data the byte[] containing the encoded parameters
252      */
253     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map,Utf8StringBuffer buffer)
254     {
255         synchronized(map)
256         {
257             String key = null;
258             String value = null;
259             
260             // TODO cache of parameter names ???
261             int end=offset+length;
262             for (int i=offset;i<end;i++)
263             {
264                 byte b=raw[i];
265                 switch ((char)(0xff&b))
266                 {
267                     case '&':
268                         value = buffer.length()==0?"":buffer.toString();
269                         buffer.reset();
270                         if (key != null)
271                         {
272                             map.add(key,value);
273                         }
274                         else if (value!=null&&value.length()>0)
275                         {
276                             map.add(value,"");
277                         }
278                         key = null;
279                         value=null;
280                         break;
281                         
282                     case '=':
283                         if (key!=null)
284                         {
285                             buffer.append(b);
286                             break;
287                         }
288                         key = buffer.toString();
289                         buffer.reset();
290                         break;
291                         
292                     case '+':
293                         buffer.append((byte)' ');
294                         break;
295                         
296                     case '%':
297                         if (i+2<end)
298                             buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
299                         break;
300                     default:
301                         buffer.append(b);
302                     break;
303                 }
304             }
305             
306             if (key != null)
307             {
308                 value = buffer.length()==0?"":buffer.toString();
309                 buffer.reset();
310                 map.add(key,value);
311             }
312             else if (buffer.length()>0)
313             {
314                 map.add(buffer.toString(),"");
315             }
316         }
317     }
318 
319     /* -------------------------------------------------------------- */
320     /** Decoded parameters to Map.
321      * @param in InputSteam to read
322      * @param map MultiMap to add parameters to
323      * @param maxLength maximum length of content to read 0r -1 for no limit
324      */
325     public static void decode88591To(InputStream in, MultiMap map, int maxLength)
326     throws IOException
327     {
328         synchronized(map)
329         {
330             StringBuffer buffer = new StringBuffer();
331             String key = null;
332             String value = null;
333             
334             int b;
335 
336             // TODO cache of parameter names ???
337             int totalLength=0;
338             while ((b=in.read())>=0)
339             {
340                 switch ((char) b)
341                 {
342                     case '&':
343                         value = buffer.length()==0?"":buffer.toString();
344                         buffer.setLength(0);
345                         if (key != null)
346                         {
347                             map.add(key,value);
348                         }
349                         else if (value!=null&&value.length()>0)
350                         {
351                             map.add(value,"");
352                         }
353                         key = null;
354                         value=null;
355                         break;
356                         
357                     case '=':
358                         if (key!=null)
359                         {
360                             buffer.append((char)b);
361                             break;
362                         }
363                         key = buffer.toString();
364                         buffer.setLength(0);
365                         break;
366                         
367                     case '+':
368                         buffer.append((char)' ');
369                         break;
370                         
371                     case '%':
372                         int dh=in.read();
373                         int dl=in.read();
374                         if (dh<0||dl<0)
375                             break;
376                         buffer.append((char)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
377                         break;
378                     default:
379                         buffer.append((char)b);
380                     break;
381                 }
382                 if (maxLength>=0 && (++totalLength > maxLength))
383                     throw new IllegalStateException("Form too large");
384             }
385             
386             if (key != null)
387             {
388                 value = buffer.length()==0?"":buffer.toString();
389                 buffer.setLength(0);
390                 map.add(key,value);
391             }
392             else if (buffer.length()>0)
393             {
394                 map.add(buffer.toString(), "");
395             }
396         }
397     }
398     
399     /* -------------------------------------------------------------- */
400     /** Decoded parameters to Map.
401      * @param in InputSteam to read
402      * @param map MultiMap to add parameters to
403      * @param maxLength maximum length of content to read 0r -1 for no limit
404      */
405     public static void decodeUtf8To(InputStream in, MultiMap map, int maxLength)
406     throws IOException
407     {
408         synchronized(map)
409         {
410             Utf8StringBuffer buffer = new Utf8StringBuffer();
411             String key = null;
412             String value = null;
413             
414             int b;
415             
416             // TODO cache of parameter names ???
417             int totalLength=0;
418             while ((b=in.read())>=0)
419             {
420                 switch ((char) b)
421                 {
422                     case '&':
423                         value = buffer.length()==0?"":buffer.toString();
424                         buffer.reset();
425                         if (key != null)
426                         {
427                             map.add(key,value);
428                         }
429                         else if (value!=null&&value.length()>0)
430                         {
431                             map.add(value,"");
432                         }
433                         key = null;
434                         value=null;
435                         break;
436                         
437                     case '=':
438                         if (key!=null)
439                         {
440                             buffer.append((byte)b);
441                             break;
442                         }
443                         key = buffer.toString();
444                         buffer.reset();
445                         break;
446                         
447                     case '+':
448                         buffer.append((byte)' ');
449                         break;
450                         
451                     case '%':
452                         int dh=in.read();
453                         int dl=in.read();
454                         if (dh<0||dl<0)
455                             break;
456                         buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
457                         break;
458                     default:
459                         buffer.append((byte)b);
460                     break;
461                 }
462                 if (maxLength>=0 && (++totalLength > maxLength))
463                     throw new IllegalStateException("Form too large");
464             }
465             
466             if (key != null)
467             {
468                 value = buffer.length()==0?"":buffer.toString();
469                 buffer.reset();
470                 map.add(key,value);
471             }
472             else if (buffer.length()>0)
473             {
474                 map.add(buffer.toString(), "");
475             }
476         }
477     }
478     
479     /* -------------------------------------------------------------- */
480     public static void decodeUtf16To(InputStream in, MultiMap map, int maxLength) throws IOException
481     {
482         InputStreamReader input = new InputStreamReader(in,StringUtil.__UTF16);
483         StringBuffer buf = new StringBuffer();
484 
485         int c;
486         int length=0;
487         if (maxLength<0)
488             maxLength=Integer.MAX_VALUE;
489         while ((c=input.read())>0 && length++<maxLength)
490             buf.append((char)c);
491         decodeTo(buf.toString(),map,ENCODING);
492     }
493     
494     /* -------------------------------------------------------------- */
495     /** Decoded parameters to Map.
496      * @param in the stream containing the encoded parameters
497      */
498     public static void decodeTo(InputStream in, MultiMap map, String charset, int maxLength)
499     throws IOException
500     {
501 
502         if (charset==null || StringUtil.__UTF8.equalsIgnoreCase(charset))
503         {
504             decodeUtf8To(in,map,maxLength);
505             return;
506         }
507         
508         if (StringUtil.__ISO_8859_1.equals(charset))
509         {
510             decode88591To(in,map,maxLength);
511             return;
512         }
513 
514         if (StringUtil.__UTF16.equalsIgnoreCase(charset)) // Should be all 2 byte encodings
515         {
516             decodeUtf16To(in,map,maxLength);
517             return;
518         }
519         
520 
521         synchronized(map)
522         {
523             String key = null;
524             String value = null;
525             
526             int c;
527             int digit=0;
528             int digits=0;
529             
530             int totalLength = 0;
531             ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
532             
533             int size=0;
534             
535             while ((c=in.read())>0)
536             {
537                 switch ((char) c)
538                 {
539                     case '&':
540                         size=output.size();
541                         value = size==0?"":output.toString(charset);
542                         output.setCount(0);
543                         if (key != null)
544                         {
545                             map.add(key,value);
546                         }
547                         else if (value!=null&&value.length()>0)
548                         {
549                             map.add(value,"");
550                         }
551                         key = null;
552                         value=null;
553                         break;
554                     case '=':
555                         if (key!=null)
556                         {
557                             output.write(c);
558                             break;
559                         }
560                         size=output.size();
561                         key = size==0?"":output.toString(charset);
562                         output.setCount(0);
563                         break;
564                     case '+':
565                         output.write(' ');
566                         break;
567                     case '%':
568                         digits=2;
569                         break;
570                     default:
571                         if (digits==2)
572                         {
573                             digit=TypeUtil.convertHexDigit((byte)c);
574                             digits=1;
575                         }
576                         else if (digits==1)
577                         {
578                             output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
579                             digits=0;
580                         }
581                         else
582                             output.write(c);
583                     break;
584                 }
585                 
586                 totalLength++;
587                 if (maxLength>=0 && totalLength > maxLength)
588                     throw new IllegalStateException("Form too large");
589             }
590 
591             size=output.size();
592             if (key != null)
593             {
594                 value = size==0?"":output.toString(charset);
595                 output.setCount(0);
596                 map.add(key,value);
597             }
598             else if (size>0)
599                 map.add(output.toString(charset),"");
600         }
601     }
602     
603     /* -------------------------------------------------------------- */
604     /** Decode String with % encoding.
605      * This method makes the assumption that the majority of calls
606      * will need no decoding.
607      */
608     public static String decodeString(String encoded,int offset,int length,String charset)
609     {
610         if (charset==null || StringUtil.isUTF8(charset))
611         {
612             Utf8StringBuffer buffer=null;
613 
614             for (int i=0;i<length;i++)
615             {
616                 char c = encoded.charAt(offset+i);
617                 if (c<0||c>0xff)
618                 {
619                     if (buffer==null)
620                     {
621                         buffer=new Utf8StringBuffer(length);
622                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i+1));
623                     }
624                     else
625                         buffer.getStringBuffer().append(c);
626                 }
627                 else if (c=='+')
628                 {
629                     if (buffer==null)
630                     {
631                         buffer=new Utf8StringBuffer(length);
632                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
633                     }
634                     
635                     buffer.getStringBuffer().append(' ');
636                 }
637                 else if (c=='%' && (i+2)<length)
638                 {
639                     if (buffer==null)
640                     {
641                         buffer=new Utf8StringBuffer(length);
642                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
643                     }
644 
645                     while(c=='%' && (i+2)<length)
646                     {
647                         try
648                         {
649                             byte b=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
650                             buffer.append(b);
651                             i+=3;
652                         }
653                         catch(NumberFormatException nfe)
654                         {
655                             buffer.getStringBuffer().append('%');
656                             for(char next; ((next=encoded.charAt(++i+offset))!='%');)
657                                 buffer.getStringBuffer().append((next=='+' ? ' ' : next));
658                         }
659 
660                         if (i<length)
661                             c = encoded.charAt(offset+i);
662                     }
663                     i--;
664                 }
665                 else if (buffer!=null)
666                     buffer.getStringBuffer().append(c);
667             }
668 
669             if (buffer==null)
670             {
671                 if (offset==0 && encoded.length()==length)
672                     return encoded;
673                 return encoded.substring(offset,offset+length);
674             }
675 
676             return buffer.toString();
677         }
678         else
679         {
680             StringBuffer buffer=null;
681 
682             try
683             {
684                 for (int i=0;i<length;i++)
685                 {
686                     char c = encoded.charAt(offset+i);
687                     if (c<0||c>0xff)
688                     {
689                         if (buffer==null)
690                         {
691                             buffer=new StringBuffer(length);
692                             buffer.append(encoded.substring(offset,offset+i+1));
693                         }
694                         else
695                             buffer.append(c);
696                     }
697                     else if (c=='+')
698                     {
699                         if (buffer==null)
700                         {
701                             buffer=new StringBuffer(length);
702                             buffer.append(encoded.substring(offset,offset+i));
703                         }
704                         
705                         buffer.append(' ');
706                     }
707                     else if (c=='%' && (i+2)<length)
708                     {
709                         if (buffer==null)
710                         {
711                             buffer=new StringBuffer(length);
712                             buffer.append(encoded.substring(offset,offset+i));
713                         }
714 
715                         byte[] ba=new byte[length];
716                         int n=0;
717                         while(c>=0 && c<=0xff)
718                         {
719                             if (c=='%')
720                             {   
721                                 if(i+2<length)
722                                 {
723                                     try
724                                     {
725                                         ba[n++]=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
726                                         i+=3;
727                                     }
728                                     catch(NumberFormatException nfe)
729                                     {                                        
730                                         ba[n-1] = (byte)'%';                                    
731                                         for(char next; ((next=encoded.charAt(++i+offset))!='%');)
732                                             ba[n++] = (byte)(next=='+' ? ' ' : next);
733                                     }
734                                 }
735                                 else
736                                 {
737                                     ba[n++] = (byte)'%';
738                                     i++;
739                                 }
740                             }
741                             else if (c=='+')
742                             {
743                                 ba[n++]=(byte)' ';
744                                 i++;
745                             }
746                             else
747                             {
748                                 ba[n++]=(byte)c;
749                                 i++;
750                             }
751                             
752                             if (i>=length)
753                                 break;
754                             c = encoded.charAt(offset+i);
755                         }
756 
757                         i--;
758                         buffer.append(new String(ba,0,n,charset));
759 
760                     }
761                     else if (buffer!=null)
762                         buffer.append(c);
763                 }
764 
765                 if (buffer==null)
766                 {
767                     if (offset==0 && encoded.length()==length)
768                         return encoded;
769                     return encoded.substring(offset,offset+length);
770                 }
771 
772                 return buffer.toString();
773             }
774             catch (UnsupportedEncodingException e)
775             {
776                 throw new RuntimeException(e);
777             }
778         }
779         
780     }
781     
782     /* ------------------------------------------------------------ */
783     /** Perform URL encoding.
784      * @param string 
785      * @return encoded string.
786      */
787     public static String encodeString(String string)
788     {
789         return encodeString(string,ENCODING);
790     }
791     
792     /* ------------------------------------------------------------ */
793     /** Perform URL encoding.
794      * @param string 
795      * @return encoded string.
796      */
797     public static String encodeString(String string,String charset)
798     {
799         if (charset==null)
800             charset=ENCODING;
801         byte[] bytes=null;
802         try
803         {
804             bytes=string.getBytes(charset);
805         }
806         catch(UnsupportedEncodingException e)
807         {
808             // Log.warn(LogSupport.EXCEPTION,e);
809             bytes=string.getBytes();
810         }
811         
812         int len=bytes.length;
813         byte[] encoded= new byte[bytes.length*3];
814         int n=0;
815         boolean noEncode=true;
816         
817         for (int i=0;i<len;i++)
818         {
819             byte b = bytes[i];
820             
821             if (b==' ')
822             {
823                 noEncode=false;
824                 encoded[n++]=(byte)'+';
825             }
826             else if (b>='a' && b<='z' ||
827                      b>='A' && b<='Z' ||
828                      b>='0' && b<='9')
829             {
830                 encoded[n++]=b;
831             }
832             else
833             {
834                 noEncode=false;
835                 encoded[n++]=(byte)'%';
836                 byte nibble= (byte) ((b&0xf0)>>4);
837                 if (nibble>=10)
838                     encoded[n++]=(byte)('A'+nibble-10);
839                 else
840                     encoded[n++]=(byte)('0'+nibble);
841                 nibble= (byte) (b&0xf);
842                 if (nibble>=10)
843                     encoded[n++]=(byte)('A'+nibble-10);
844                 else
845                     encoded[n++]=(byte)('0'+nibble);
846             }
847         }
848 
849         if (noEncode)
850             return string;
851         
852         try
853         {    
854             return new String(encoded,0,n,charset);
855         }
856         catch(UnsupportedEncodingException e)
857         {
858             // Log.warn(LogSupport.EXCEPTION,e);
859             return new String(encoded,0,n);
860         }
861     }
862 
863 
864     /* ------------------------------------------------------------ */
865     /** 
866      */
867     public Object clone()
868     {
869         return new UrlEncoded(this);
870     }
871 }