Merge "upd: remove 'browser install'"

[gigi.git] / lib / jetty / org / eclipse / jetty / util / StringUtil.java
diff --git a/lib/jetty/org/eclipse/jetty/util/StringUtil.java b/lib/jetty/org/eclipse/jetty/util/StringUtil.java

index 55868ad5edd9039d36e33e3094dd27729a18ffc4..9dbd9e49b73daf30b271938f40a5e3a05e3387ba 100644 (file)
--- a/lib/jetty/org/eclipse/jetty/util/StringUtil.java
+++ b/lib/jetty/org/eclipse/jetty/util/StringUtil.java
@@ -1,6 +1,6 @@
  //
  //  ========================================================================
-//  Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd.
+//  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
  //  ------------------------------------------------------------------------
  //  All rights reserved. This program and the accompanying materials
  //  are made available under the terms of the Eclipse Public License v1.0
@@ -21,6 +21,8 @@ package org.eclipse.jetty.util;
  import java.io.UnsupportedEncodingException;
  import java.nio.charset.Charset;
  import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
  
  import org.eclipse.jetty.util.log.Log;
  import org.eclipse.jetty.util.log.Logger;
@@ -43,8 +45,10 @@ public class StringUtil
      
      public static final String ALL_INTERFACES="0.0.0.0";
      public static final String CRLF="\015\012";
-    public static final String __LINE_SEPARATOR=
-        System.getProperty("line.separator","\n");
+    
+    /** @deprecated use {@link System#lineSeparator()} instead */
+    @Deprecated
+    public static final String __LINE_SEPARATOR = System.lineSeparator();
         
      public static final String __ISO_8859_1="ISO-8859-1";
      public final static String __UTF8="UTF-8";
@@ -370,6 +374,53 @@ public class StringUtil
          }
      }
  
+    /**
+     * Find the index of a control characters in String
+     * <p>
+     * This will return a result on the first occurrence of a control character, regardless if
+     * there are more than one.
+     * </p>
+     * <p>
+     * Note: uses codepoint version of {@link Character#isISOControl(int)} to support Unicode better.
+     * </p>
+     *
+     * <pre>
+     *   indexOfControlChars(null)      == -1
+     *   indexOfControlChars("")        == -1
+     *   indexOfControlChars("\r\n")    == 0
+     *   indexOfControlChars("\t")      == 0
+     *   indexOfControlChars("   ")     == -1
+     *   indexOfControlChars("a")       == -1
+     *   indexOfControlChars(".")       == -1
+     *   indexOfControlChars(";\n")     == 1
+     *   indexOfControlChars("abc\f")   == 3
+     *   indexOfControlChars("z\010")   == 1
+     *   indexOfControlChars(":\u001c") == 1
+     * </pre>
+     *
+     * @param str
+     *            the string to test.
+     * @return the index of first control character in string, -1 if no control characters encountered
+     */
+    public static int indexOfControlChars(String str)
+    {
+        if (str == null)
+        {
+            return -1;
+        }
+        int len = str.length();
+        for (int i = 0; i < len; i++)
+        {
+            if (Character.isISOControl(str.codePointAt(i)))
+            {
+                // found a control character, we can stop searching  now
+                return i;
+            }
+        }
+        // no control characters
+        return -1;
+    }
+
      /* ------------------------------------------------------------ */
      /**
       * Test if a string is null or only has whitespace characters in it.
@@ -719,6 +770,11 @@ public class StringUtil
          return str.substring(0,maxSize);
      }
  
+    /**
+    * Parse the string representation of a list using {@link #csvSplit(List,String,int,int)}
+    * @param s The string to parse, expected to be enclosed as '[...]'
+    * @return An array of parsed values.
+    */
      public static String[] arrayFromString(String s) 
      {
          if (s==null)
@@ -729,7 +785,249 @@ public class StringUtil
          if (s.length()==2)
              return new String[]{};
  
-        return s.substring(1,s.length()-1).split(" *, *");
+        return csvSplit(s,1,s.length()-2);
+    }
+    
+    /**
+    * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
+    * @param s The string to parse
+    * @return An array of parsed values.
+    */
+    public static String[] csvSplit(String s)
+    {
+        if (s==null)
+            return null;
+        return csvSplit(s,0,s.length());
+    }
+    
+    /**
+     * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
+     * @param s The string to parse
+     * @param off The offset into the string to start parsing
+     * @param len The len in characters to parse
+     * @return An array of parsed values.
+     */
+    public static String[] csvSplit(String s, int off,int len)
+    {
+        if (s==null)
+            return null;
+        if (off<0 || len<0 || off>s.length())
+            throw new IllegalArgumentException();
+
+        List<String> list = new ArrayList<>();
+        csvSplit(list,s,off,len);
+        return list.toArray(new String[list.size()]);
+    }
+
+    enum CsvSplitState { PRE_DATA, QUOTE, SLOSH, DATA, WHITE, POST_DATA };
+
+    /** Split a quoted comma separated string to a list
+     * <p>Handle <a href="https://www.ietf.org/rfc/rfc4180.txt">rfc4180</a>-like 
+     * CSV strings, with the exceptions:<ul>
+     * <li>quoted values may contain double quotes escaped with back-slash
+     * <li>Non-quoted values are trimmed of leading trailing white space
+     * <li>trailing commas are ignored
+     * <li>double commas result in a empty string value
+     * </ul>  
+     * @param list The Collection to split to (or null to get a new list)
+     * @param s The string to parse
+     * @param off The offset into the string to start parsing
+     * @param len The len in characters to parse
+     * @return list containing the parsed list values
+     */
+    public static List<String> csvSplit(List<String> list,String s, int off,int len)
+    {
+        if (list==null)
+            list=new ArrayList<>();
+        CsvSplitState state = CsvSplitState.PRE_DATA;
+        StringBuilder out = new StringBuilder();
+        int last=-1;
+        while (len>0)
+        {
+            char ch = s.charAt(off++);
+            len--;
+            
+            switch(state)
+            {
+                case PRE_DATA:
+                    if (Character.isWhitespace(ch))
+                        continue;
+
+                    if ('"'==ch)
+                    {
+                        state=CsvSplitState.QUOTE;
+                        continue;
+                    }
+                    
+                    if (','==ch)
+                    {
+                        list.add("");
+                        continue;
+                    }
+
+                    state=CsvSplitState.DATA;
+                    out.append(ch);
+                    continue;
+
+                case DATA:
+                    if (Character.isWhitespace(ch))
+                    {
+                        last=out.length();
+                        out.append(ch);
+                        state=CsvSplitState.WHITE;
+                        continue;
+                    }
+                    
+                    if (','==ch)
+                    {
+                        list.add(out.toString());
+                        out.setLength(0);
+                        state=CsvSplitState.PRE_DATA;
+                        continue;
+                    }
+
+                    out.append(ch);
+                    continue;
+                    
+                case WHITE:
+                    if (Character.isWhitespace(ch))
+                    {
+                        out.append(ch);
+                        continue;
+                    }
+                    
+                    if (','==ch)
+                    {
+                        out.setLength(last);
+                        list.add(out.toString());
+                        out.setLength(0);
+                        state=CsvSplitState.PRE_DATA;
+                        continue;
+                    }
+                    
+                    state=CsvSplitState.DATA;
+                    out.append(ch);
+                    last=-1;
+                    continue;
+
+                case QUOTE:
+                    if ('\\'==ch)
+                    {
+                        state=CsvSplitState.SLOSH;
+                        continue;
+                    }
+                    if ('"'==ch)
+                    {
+                        list.add(out.toString());
+                        out.setLength(0);
+                        state=CsvSplitState.POST_DATA;
+                        continue;
+                    }
+                    out.append(ch);
+                    continue;
+                    
+                case SLOSH:
+                    out.append(ch);
+                    state=CsvSplitState.QUOTE;
+                    continue;
+                    
+                case POST_DATA:
+                    if (','==ch)
+                    {
+                        state=CsvSplitState.PRE_DATA;
+                        continue;
+                    }
+                    continue;
+            }
+        }
+
+        switch(state)
+        {
+            case PRE_DATA:
+            case POST_DATA:
+                break;
+
+            case DATA:
+            case QUOTE:
+            case SLOSH:
+                list.add(out.toString());
+                break;
+                
+            case WHITE:
+                out.setLength(last);
+                list.add(out.toString());
+                break;
+        }
+        
+        return list;
+    }
+
+    public static String sanitizeXmlString(String html)
+    {
+        if (html==null)
+            return null;
+        
+        int i=0;
+        
+        // Are there any characters that need sanitizing?
+        loop: for (;i<html.length();i++)
+        {
+            char c=html.charAt(i);
+
+            switch(c)
+            {
+                case '&' :
+                case '<' :
+                case '>' :
+                case '\'':
+                case '"':
+                    break loop;
+
+                default:
+                    if (Character.isISOControl(c) && !Character.isWhitespace(c))
+                        break loop;
+            }
+        }
+
+        // No characters need sanitizing, so return original string
+        if (i==html.length())
+            return html;
+        
+        // Create builder with OK content so far 
+        StringBuilder out = new StringBuilder(html.length()*4/3);
+        out.append(html,0,i);
+        
+        // sanitize remaining content
+        for (;i<html.length();i++)
+        {
+            char c=html.charAt(i);
+
+            switch(c)
+            {
+                case '&' :
+                    out.append("&amp;");
+                    break;
+                case '<' :
+                    out.append("&lt;");
+                    break;
+                case '>' :
+                    out.append("&gt;");
+                    break;
+                case '\'':
+                    out.append("&apos;");
+                    break;
+                case '"':
+                    out.append("&quot;");
+                    break;
+
+                default:
+                    if (Character.isISOControl(c) && !Character.isWhitespace(c))
+                        out.append('?');
+                    else
+                        out.append(c);
+            }
+        }
+        return out.toString();
      }
  
  }