View Javadoc

1   //========================================================================
2   //Copyright 2006 Mort Bay Consulting Pty. Ltd.
3   //------------------------------------------------------------------------
4   //Licensed under the Apache License, Version 2.0 (the "License");
5   //you may not use this file except in compliance with the License.
6   //You may obtain a copy of the License at 
7   //http://www.apache.org/licenses/LICENSE-2.0
8   //Unless required by applicable law or agreed to in writing, software
9   //distributed under the License is distributed on an "AS IS" BASIS,
10  //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  //See the License for the specific language governing permissions and
12  //limitations under the License.
13  //========================================================================
14  
15  package org.mortbay.util;
16  
17  /* ------------------------------------------------------------ */
18  /** UTF-8 StringBuffer.
19   *
20   * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
21   * UTF-8 encoded bytes, that are converted into characters.
22   * 
23   * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
24   * state a character is appended to the string buffer.
25   * 
26   * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
27   * The UTF-8 code was inspired by http://javolution.org
28   * 
29   */
30  public class Utf8StringBuffer 
31  {
32      StringBuffer _buffer;
33      int _more;
34      int _bits;
35      boolean _errors;
36      
37      public Utf8StringBuffer()
38      {
39          _buffer=new StringBuffer();
40      }
41      
42      public Utf8StringBuffer(int capacity)
43      {
44          _buffer=new StringBuffer(capacity);
45      }
46  
47      public void append(byte[] b,int offset, int length)
48      {
49          int end=offset+length;
50          for (int i=offset; i<end;i++)
51              append(b[i]);
52      }
53      
54      public void append(byte b)
55      {
56          if (b>=0)
57          {
58              if (_more>0)
59              {
60                  _buffer.append('?');
61                  _more=0;
62                  _bits=0;
63              }
64              else
65                  _buffer.append((char)(0x7f&b));
66          }
67          else if (_more==0)
68          {
69              if ((b&0xc0)!=0xc0)
70              {
71                  // 10xxxxxx
72                  _buffer.append('?');
73                  _more=0;
74                  _bits=0;
75              }
76              else if ((b & 0xe0) == 0xc0)
77              {
78                  //110xxxxx
79                  _more=1;
80                  _bits=b&0x1f;
81              }
82              else if ((b & 0xf0) == 0xe0)
83              {
84                  //1110xxxx
85                  _more=2;
86                  _bits=b&0x0f;
87              }
88              else if ((b & 0xf8) == 0xf0)
89              {
90                  //11110xxx
91                  _more=3;
92                  _bits=b&0x07;
93              }
94              else if ((b & 0xfc) == 0xf8)
95              {
96                  //111110xx
97                  _more=4;
98                  _bits=b&0x03;
99              }
100             else if ((b & 0xfe) == 0xfc) 
101             {
102                 //1111110x
103                 _more=5;
104                 _bits=b&0x01;
105             }
106         }
107         else
108         {
109             if ((b&0xc0)==0xc0)
110             {    // 11??????
111                 _buffer.append('?');
112                 _more=0;
113                 _bits=0;
114                 _errors=true;
115             }
116             else
117             {
118                 // 10xxxxxx
119                 _bits=(_bits<<6)|(b&0x3f);
120                 if (--_more==0)
121                     _buffer.append((char)_bits);
122             }
123         }
124     }
125     
126     public int length()
127     {
128         return _buffer.length();
129     }
130     
131     public void reset()
132     {
133         _buffer.setLength(0);
134         _more=0;
135         _bits=0;
136         _errors=false;
137     }
138     
139     public StringBuffer getStringBuffer()
140     {
141         return _buffer;
142     }
143     
144     public String toString()
145     {
146         return _buffer.toString();
147     }
148     
149     /* ------------------------------------------------------------ */
150     /** 
151      * @return True if there are non UTF-8 characters or incomplete UTF-8 characters in the buffer.
152      */
153     public boolean isError()
154     {
155         return _errors || _more>0;
156     }
157 }