]> WPIA git - gigi.git/blobdiff - lib/jetty/org/eclipse/jetty/util/Utf8Appendable.java
Importing upstream Jetty jetty-9.2.1.v20140609
[gigi.git] / lib / jetty / org / eclipse / jetty / util / Utf8Appendable.java
diff --git a/lib/jetty/org/eclipse/jetty/util/Utf8Appendable.java b/lib/jetty/org/eclipse/jetty/util/Utf8Appendable.java
new file mode 100644 (file)
index 0000000..ff58764
--- /dev/null
@@ -0,0 +1,256 @@
+//
+//  ========================================================================
+//  Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd.
+//  ------------------------------------------------------------------------
+//  All rights reserved. This program and the accompanying materials
+//  are made available under the terms of the Eclipse Public License v1.0
+//  and Apache License v2.0 which accompanies this distribution.
+//
+//      The Eclipse Public License is available at
+//      http://www.eclipse.org/legal/epl-v10.html
+//
+//      The Apache License v2.0 is available at
+//      http://www.opensource.org/licenses/apache2.0.php
+//
+//  You may elect to redistribute this code under either of these licenses.
+//  ========================================================================
+//
+
+package org.eclipse.jetty.util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.eclipse.jetty.util.log.Log;
+import org.eclipse.jetty.util.log.Logger;
+
+/* ------------------------------------------------------------ */
+/**
+ * Utf8 Appendable abstract base class
+ *
+ * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters.
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer.
+ *
+ * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
+ * License information for Bjoern Hoehrmann's code:
+ *
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **/
+public abstract class Utf8Appendable
+{
+    protected static final Logger LOG = Log.getLogger(Utf8Appendable.class);
+    public static final char REPLACEMENT = '\ufffd';
+    public static final byte[] REPLACEMENT_UTF8 = new byte[] {(byte)0xEF,(byte)0xBF,(byte)0xBD };
+    private static final int UTF8_ACCEPT = 0;
+    private static final int UTF8_REJECT = 12;
+
+    protected final Appendable _appendable;
+    protected int _state = UTF8_ACCEPT;
+
+    private static final byte[] BYTE_TABLE =
+    {
+        // The first part of the table maps bytes to character classes that
+        // to reduce the size of the transition table and create bitmasks.
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+         8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
+    };
+
+    private static final byte[] TRANS_TABLE =
+    {
+        // The second part is a transition table that maps a combination
+        // of a state of the automaton and a character class to a state.
+         0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+        12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+        12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+        12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+        12,36,12,12,12,12,12,12,12,12,12,12
+    };
+
+    private int _codep;
+
+    public Utf8Appendable(Appendable appendable)
+    {
+        _appendable = appendable;
+    }
+
+    public abstract int length();
+
+    protected void reset()
+    {
+        _state = UTF8_ACCEPT;
+    }
+
+    public void append(byte b)
+    {
+        try
+        {
+            appendByte(b);
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+    
+    public void append(ByteBuffer buf)
+    {
+        try
+        {
+            while (buf.remaining() > 0)
+            {
+                appendByte(buf.get());
+            }
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void append(byte[] b, int offset, int length)
+    {
+        try
+        {
+            int end = offset + length;
+            for (int i = offset; i < end; i++)
+                appendByte(b[i]);
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public boolean append(byte[] b, int offset, int length, int maxChars)
+    {
+        try
+        {
+            int end = offset + length;
+            for (int i = offset; i < end; i++)
+            {
+                if (length() > maxChars)
+                    return false;
+                appendByte(b[i]);
+            }
+            return true;
+        }
+        catch (IOException e)
+        {
+            throw new RuntimeException(e);
+        }
+    }
+
+    protected void appendByte(byte b) throws IOException
+    {
+
+        if (b > 0 && _state == UTF8_ACCEPT)
+        {
+            _appendable.append((char)(b & 0xFF));
+        }
+        else
+        {
+            int i = b & 0xFF;
+            int type = BYTE_TABLE[i];
+            _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
+            int next = TRANS_TABLE[_state + type];
+
+            switch(next)
+            {
+                case UTF8_ACCEPT:
+                    _state=next;
+                    if (_codep < Character.MIN_HIGH_SURROGATE)
+                    {
+                        _appendable.append((char)_codep);
+                    }
+                    else
+                    {
+                        for (char c : Character.toChars(_codep))
+                            _appendable.append(c);
+                    }
+                    break;
+                    
+                case UTF8_REJECT:
+                    String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12);
+                    _codep=0;
+                    _state = UTF8_ACCEPT;
+                    _appendable.append(REPLACEMENT);
+                    throw new NotUtf8Exception(reason);
+                    
+                default:
+                    _state=next;
+                    
+            }
+        }
+    }
+
+    public boolean isUtf8SequenceComplete()
+    {
+        return _state == UTF8_ACCEPT;
+    }
+
+    @SuppressWarnings("serial")
+    public static class NotUtf8Exception extends IllegalArgumentException
+    {
+        public NotUtf8Exception(String reason)
+        {
+            super("Not valid UTF8! "+reason);
+        }
+    }
+
+    protected void checkState()
+    {
+        if (!isUtf8SequenceComplete())
+        {
+            _codep=0;
+            _state = UTF8_ACCEPT;
+            try
+            {
+                _appendable.append(REPLACEMENT);
+            }
+            catch(IOException e)
+            {
+                throw new RuntimeException(e);
+            }
+            throw new NotUtf8Exception("incomplete UTF8 sequence");
+        }
+    }
+    
+    public String toReplacedString()
+    {
+        if (!isUtf8SequenceComplete())
+        {
+            _codep=0;
+            _state = UTF8_ACCEPT;
+            try
+            {
+                _appendable.append(REPLACEMENT);
+            }
+            catch(IOException e)
+            {
+                throw new RuntimeException(e);
+            }
+            Throwable th= new NotUtf8Exception("incomplete UTF8 sequence");
+            LOG.warn(th.toString());
+            LOG.debug(th);
+        }
+        return _appendable.toString();
+    }
+}