2 // ========================================================================
3 // Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
4 // ------------------------------------------------------------------------
5 // All rights reserved. This program and the accompanying materials
6 // are made available under the terms of the Eclipse Public License v1.0
7 // and Apache License v2.0 which accompanies this distribution.
9 // The Eclipse Public License is available at
10 // http://www.eclipse.org/legal/epl-v10.html
12 // The Apache License v2.0 is available at
13 // http://www.opensource.org/licenses/apache2.0.php
15 // You may elect to redistribute this code under either of these licenses.
16 // ========================================================================
19 package org.eclipse.jetty.util;
21 import java.io.UnsupportedEncodingException;
22 import java.nio.charset.Charset;
23 import java.nio.charset.StandardCharsets;
24 import java.util.ArrayList;
25 import java.util.List;
27 import org.eclipse.jetty.util.log.Log;
28 import org.eclipse.jetty.util.log.Logger;
30 /** Fast String Utilities.
32 * These string utilities provide both convenience methods and
33 * performance improvements over most standard library versions. The
34 * main aim of the optimizations is to avoid object creation unless
35 * absolutely required.
39 public class StringUtil
41 private static final Logger LOG = Log.getLogger(StringUtil.class);
44 private final static Trie<String> CHARSETS= new ArrayTrie<>(256);
46 public static final String ALL_INTERFACES="0.0.0.0";
47 public static final String CRLF="\015\012";
49 /** @deprecated use {@link System#lineSeparator()} instead */
51 public static final String __LINE_SEPARATOR = System.lineSeparator();
53 public static final String __ISO_8859_1="ISO-8859-1";
54 public final static String __UTF8="UTF-8";
55 public final static String __UTF16="UTF-16";
58 * @deprecated Use {@link StandardCharsets#UTF_8}
61 public final static Charset __UTF8_CHARSET=StandardCharsets.UTF_8;
63 * @deprecated Use {@link StandardCharsets#ISO_8859_1}
66 public final static Charset __ISO_8859_1_CHARSET=StandardCharsets.ISO_8859_1;
68 * @deprecated Use {@link StandardCharsets#UTF_16}
71 public final static Charset __UTF16_CHARSET=StandardCharsets.UTF_16;
73 * @deprecated Use {@link StandardCharsets#US_ASCII}
76 public final static Charset __US_ASCII_CHARSET=StandardCharsets.US_ASCII;
80 CHARSETS.put("UTF-8",__UTF8);
81 CHARSETS.put("UTF8",__UTF8);
82 CHARSETS.put("UTF-16",__UTF16);
83 CHARSETS.put("UTF16",__UTF16);
84 CHARSETS.put("ISO-8859-1",__ISO_8859_1);
85 CHARSETS.put("ISO_8859_1",__ISO_8859_1);
88 /* ------------------------------------------------------------ */
89 /** Convert alternate charset names (eg utf8) to normalized
92 public static String normalizeCharset(String s)
94 String n=CHARSETS.get(s);
98 /* ------------------------------------------------------------ */
99 /** Convert alternate charset names (eg utf8) to normalized
102 public static String normalizeCharset(String s,int offset,int length)
104 String n=CHARSETS.get(s,offset,length);
105 return (n==null)?s.substring(offset,offset+length):n;
109 /* ------------------------------------------------------------ */
110 public static final char[] lowercases = {
111 '\000','\001','\002','\003','\004','\005','\006','\007',
112 '\010','\011','\012','\013','\014','\015','\016','\017',
113 '\020','\021','\022','\023','\024','\025','\026','\027',
114 '\030','\031','\032','\033','\034','\035','\036','\037',
115 '\040','\041','\042','\043','\044','\045','\046','\047',
116 '\050','\051','\052','\053','\054','\055','\056','\057',
117 '\060','\061','\062','\063','\064','\065','\066','\067',
118 '\070','\071','\072','\073','\074','\075','\076','\077',
119 '\100','\141','\142','\143','\144','\145','\146','\147',
120 '\150','\151','\152','\153','\154','\155','\156','\157',
121 '\160','\161','\162','\163','\164','\165','\166','\167',
122 '\170','\171','\172','\133','\134','\135','\136','\137',
123 '\140','\141','\142','\143','\144','\145','\146','\147',
124 '\150','\151','\152','\153','\154','\155','\156','\157',
125 '\160','\161','\162','\163','\164','\165','\166','\167',
126 '\170','\171','\172','\173','\174','\175','\176','\177' };
128 /* ------------------------------------------------------------ */
130 * fast lower case conversion. Only works on ascii (not unicode)
131 * @param s the string to convert
132 * @return a lower case version of s
134 public static String asciiToLowerCase(String s)
139 // look for first conversion
145 char c2=lowercases[c1];
158 c[i] = lowercases[c[i]];
161 return c==null?s:new String(c);
165 /* ------------------------------------------------------------ */
166 public static boolean startsWithIgnoreCase(String s,String w)
171 if (s==null || s.length()<w.length())
174 for (int i=0;i<w.length();i++)
191 /* ------------------------------------------------------------ */
192 public static boolean endsWithIgnoreCase(String s,String w)
206 for (int i=wl;i-->0;)
208 char c1=s.charAt(--sl);
223 /* ------------------------------------------------------------ */
225 * returns the next index of a character from the chars string
227 public static int indexFrom(String s,String chars)
229 for (int i=0;i<s.length();i++)
230 if (chars.indexOf(s.charAt(i))>=0)
235 /* ------------------------------------------------------------ */
237 * replace substrings within string.
239 public static String replace(String s, String sub, String with)
242 int i=s.indexOf(sub,c);
246 StringBuilder buf = new StringBuilder(s.length()+with.length());
250 buf.append(s.substring(c,i));
253 } while ((i=s.indexOf(sub,c))!=-1);
256 buf.append(s.substring(c,s.length()));
258 return buf.toString();
263 /* ------------------------------------------------------------ */
264 /** Remove single or double quotes.
266 public static String unquote(String s)
268 return QuotedStringTokenizer.unquote(s);
272 /* ------------------------------------------------------------ */
273 /** Append substring to StringBuilder
274 * @param buf StringBuilder to append to
275 * @param s String to append from
276 * @param offset The offset of the substring
277 * @param length The length of the substring
279 public static void append(StringBuilder buf,
286 int end=offset+length;
287 for (int i=offset; i<end;i++)
291 buf.append(s.charAt(i));
297 /* ------------------------------------------------------------ */
302 public static void append(StringBuilder buf,byte b,int base)
305 int c='0'+(bi/base)%base;
315 /* ------------------------------------------------------------ */
316 public static void append2digits(StringBuffer buf,int i)
320 buf.append((char)(i/10+'0'));
321 buf.append((char)(i%10+'0'));
325 /* ------------------------------------------------------------ */
326 public static void append2digits(StringBuilder buf,int i)
330 buf.append((char)(i/10+'0'));
331 buf.append((char)(i%10+'0'));
335 /* ------------------------------------------------------------ */
336 /** Return a non null string.
338 * @return The string passed in or empty string if it is null.
340 public static String nonNull(String s)
347 /* ------------------------------------------------------------ */
348 public static boolean equals(String s,char[] buf, int offset, int length)
350 if (s.length()!=length)
352 for (int i=0;i<length;i++)
353 if (buf[offset+i]!=s.charAt(i))
358 /* ------------------------------------------------------------ */
359 public static String toUTF8String(byte[] b,int offset,int length)
361 return new String(b,offset,length,StandardCharsets.UTF_8);
364 /* ------------------------------------------------------------ */
365 public static String toString(byte[] b,int offset,int length,String charset)
369 return new String(b,offset,length,charset);
371 catch (UnsupportedEncodingException e)
373 throw new IllegalArgumentException(e);
378 * Find the index of a control characters in String
380 * This will return a result on the first occurrence of a control character, regardless if
381 * there are more than one.
384 * Note: uses codepoint version of {@link Character#isISOControl(int)} to support Unicode better.
388 * indexOfControlChars(null) == -1
389 * indexOfControlChars("") == -1
390 * indexOfControlChars("\r\n") == 0
391 * indexOfControlChars("\t") == 0
392 * indexOfControlChars(" ") == -1
393 * indexOfControlChars("a") == -1
394 * indexOfControlChars(".") == -1
395 * indexOfControlChars(";\n") == 1
396 * indexOfControlChars("abc\f") == 3
397 * indexOfControlChars("z\010") == 1
398 * indexOfControlChars(":\u001c") == 1
402 * the string to test.
403 * @return the index of first control character in string, -1 if no control characters encountered
405 public static int indexOfControlChars(String str)
411 int len = str.length();
412 for (int i = 0; i < len; i++)
414 if (Character.isISOControl(str.codePointAt(i)))
416 // found a control character, we can stop searching now
420 // no control characters
424 /* ------------------------------------------------------------ */
426 * Test if a string is null or only has whitespace characters in it.
428 * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
431 * isBlank(null) == true
432 * isBlank("") == true
433 * isBlank("\r\n") == true
434 * isBlank("\t") == true
435 * isBlank(" ") == true
436 * isBlank("a") == false
437 * isBlank(".") == false
438 * isBlank(";\n") == false
442 * the string to test.
443 * @return true if string is null or only whitespace characters, false if non-whitespace characters encountered.
445 public static boolean isBlank(String str)
451 int len = str.length();
452 for (int i = 0; i < len; i++)
454 if (!Character.isWhitespace(str.codePointAt(i)))
456 // found a non-whitespace, we can stop searching now
464 /* ------------------------------------------------------------ */
466 * Test if a string is not null and contains at least 1 non-whitespace characters in it.
468 * Note: uses codepoint version of {@link Character#isWhitespace(int)} to support Unicode better.
471 * isNotBlank(null) == false
472 * isNotBlank("") == false
473 * isNotBlank("\r\n") == false
474 * isNotBlank("\t") == false
475 * isNotBlank(" ") == false
476 * isNotBlank("a") == true
477 * isNotBlank(".") == true
478 * isNotBlank(";\n") == true
482 * the string to test.
483 * @return true if string is not null and has at least 1 non-whitespace character, false if null or all-whitespace characters.
485 public static boolean isNotBlank(String str)
491 int len = str.length();
492 for (int i = 0; i < len; i++)
494 if (!Character.isWhitespace(str.codePointAt(i)))
496 // found a non-whitespace, we can stop searching now
504 /* ------------------------------------------------------------ */
505 public static boolean isUTF8(String charset)
507 return __UTF8.equalsIgnoreCase(charset)||__UTF8.equalsIgnoreCase(normalizeCharset(charset));
511 /* ------------------------------------------------------------ */
512 public static String printable(String name)
516 StringBuilder buf = new StringBuilder(name.length());
517 for (int i=0;i<name.length();i++)
519 char c=name.charAt(i);
520 if (!Character.isISOControl(c))
523 return buf.toString();
526 /* ------------------------------------------------------------ */
527 public static String printable(byte[] b)
529 StringBuilder buf = new StringBuilder();
530 for (int i=0;i<b.length;i++)
533 if (Character.isWhitespace(c)|| c>' ' && c<0x7f)
538 TypeUtil.toHex(b[i],buf);
541 return buf.toString();
544 public static byte[] getBytes(String s)
546 return s.getBytes(StandardCharsets.ISO_8859_1);
549 public static byte[] getUtf8Bytes(String s)
551 return s.getBytes(StandardCharsets.UTF_8);
554 public static byte[] getBytes(String s,String charset)
558 return s.getBytes(charset);
570 * Converts a binary SID to a string SID
572 * http://en.wikipedia.org/wiki/Security_Identifier
574 * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
576 public static String sidBytesToString(byte[] sidBytes)
578 StringBuilder sidString = new StringBuilder();
580 // Identify this as a SID
581 sidString.append("S-");
583 // Add SID revision level (expect 1 but may change someday)
584 sidString.append(Byte.toString(sidBytes[0])).append('-');
586 StringBuilder tmpBuilder = new StringBuilder();
588 // crunch the six bytes of issuing authority value
589 for (int i = 2; i <= 7; ++i)
591 tmpBuilder.append(Integer.toHexString(sidBytes[i] & 0xFF));
594 sidString.append(Long.parseLong(tmpBuilder.toString(), 16)); // '-' is in the subauth loop
596 // the number of subAuthorities we need to attach
597 int subAuthorityCount = sidBytes[1];
599 // attach each of the subAuthorities
600 for (int i = 0; i < subAuthorityCount; ++i)
603 tmpBuilder.setLength(0);
604 // these need to be zero padded hex and little endian
605 tmpBuilder.append(String.format("%02X%02X%02X%02X",
606 (sidBytes[11 + offset] & 0xFF),
607 (sidBytes[10 + offset] & 0xFF),
608 (sidBytes[9 + offset] & 0xFF),
609 (sidBytes[8 + offset] & 0xFF)));
610 sidString.append('-').append(Long.parseLong(tmpBuilder.toString(), 16));
613 return sidString.toString();
617 * Converts a string SID to a binary SID
619 * http://en.wikipedia.org/wiki/Security_Identifier
621 * S-1-IdentifierAuthority-SubAuthority1-SubAuthority2-...-SubAuthorityn
623 public static byte[] sidStringToBytes( String sidString )
625 String[] sidTokens = sidString.split("-");
627 int subAuthorityCount = sidTokens.length - 3; // S-Rev-IdAuth-
630 byte[] sidBytes = new byte[1 + 1 + 6 + (4 * subAuthorityCount)];
633 sidBytes[byteCount++] = (byte)Integer.parseInt(sidTokens[1]);
635 // the # of sub authorities byte
636 sidBytes[byteCount++] = (byte)subAuthorityCount;
639 String hexStr = Long.toHexString(Long.parseLong(sidTokens[2]));
641 while( hexStr.length() < 12) // pad to 12 characters
643 hexStr = "0" + hexStr;
646 // place the certAuthority 6 bytes
647 for ( int i = 0 ; i < hexStr.length(); i = i + 2)
649 sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(i, i + 2),16);
653 for ( int i = 3; i < sidTokens.length ; ++i)
655 hexStr = Long.toHexString(Long.parseLong(sidTokens[i]));
657 while( hexStr.length() < 8) // pad to 8 characters
659 hexStr = "0" + hexStr;
662 // place the inverted sub authorities, 4 bytes each
663 for ( int j = hexStr.length(); j > 0; j = j - 2)
665 sidBytes[byteCount++] = (byte)Integer.parseInt(hexStr.substring(j-2, j),16);
674 * Convert String to an integer. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
677 * A String containing an integer.
680 public static int toInt(String string)
683 boolean started = false;
684 boolean minus = false;
686 for (int i = 0; i < string.length(); i++)
688 char b = string.charAt(i);
694 else if (b >= '0' && b <= '9')
696 val = val * 10 + (b - '0');
699 else if (b == '-' && !started)
708 return minus?(-val):val;
709 throw new NumberFormatException(string);
713 * Convert String to an long. Parses up to the first non-numeric character. If no number is found an IllegalArgumentException is thrown
716 * A String containing an integer.
719 public static long toLong(String string)
722 boolean started = false;
723 boolean minus = false;
725 for (int i = 0; i < string.length(); i++)
727 char b = string.charAt(i);
733 else if (b >= '0' && b <= '9')
735 val = val * 10L + (b - '0');
738 else if (b == '-' && !started)
747 return minus?(-val):val;
748 throw new NumberFormatException(string);
752 * Truncate a string to a max size.
754 * @param str the string to possibly truncate
755 * @param maxSize the maximum size of the string
756 * @return the truncated string. if <code>str</code> param is null, then the returned string will also be null.
758 public static String truncate(String str, int maxSize)
765 if (str.length() <= maxSize)
770 return str.substring(0,maxSize);
774 * Parse the string representation of a list using {@link #csvSplit(List,String,int,int)}
775 * @param s The string to parse, expected to be enclosed as '[...]'
776 * @return An array of parsed values.
778 public static String[] arrayFromString(String s)
781 return new String[]{};
783 if (!s.startsWith("[") || !s.endsWith("]"))
784 throw new IllegalArgumentException();
786 return new String[]{};
788 return csvSplit(s,1,s.length()-2);
792 * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
793 * @param s The string to parse
794 * @return An array of parsed values.
796 public static String[] csvSplit(String s)
800 return csvSplit(s,0,s.length());
804 * Parse a CSV string using {@link #csvSplit(List,String, int, int)}
805 * @param s The string to parse
806 * @param off The offset into the string to start parsing
807 * @param len The len in characters to parse
808 * @return An array of parsed values.
810 public static String[] csvSplit(String s, int off,int len)
814 if (off<0 || len<0 || off>s.length())
815 throw new IllegalArgumentException();
817 List<String> list = new ArrayList<>();
818 csvSplit(list,s,off,len);
819 return list.toArray(new String[list.size()]);
822 enum CsvSplitState { PRE_DATA, QUOTE, SLOSH, DATA, WHITE, POST_DATA };
824 /** Split a quoted comma separated string to a list
825 * <p>Handle <a href="https://www.ietf.org/rfc/rfc4180.txt">rfc4180</a>-like
826 * CSV strings, with the exceptions:<ul>
827 * <li>quoted values may contain double quotes escaped with back-slash
828 * <li>Non-quoted values are trimmed of leading trailing white space
829 * <li>trailing commas are ignored
830 * <li>double commas result in a empty string value
832 * @param list The Collection to split to (or null to get a new list)
833 * @param s The string to parse
834 * @param off The offset into the string to start parsing
835 * @param len The len in characters to parse
836 * @return list containing the parsed list values
838 public static List<String> csvSplit(List<String> list,String s, int off,int len)
841 list=new ArrayList<>();
842 CsvSplitState state = CsvSplitState.PRE_DATA;
843 StringBuilder out = new StringBuilder();
847 char ch = s.charAt(off++);
853 if (Character.isWhitespace(ch))
858 state=CsvSplitState.QUOTE;
868 state=CsvSplitState.DATA;
873 if (Character.isWhitespace(ch))
877 state=CsvSplitState.WHITE;
883 list.add(out.toString());
885 state=CsvSplitState.PRE_DATA;
893 if (Character.isWhitespace(ch))
902 list.add(out.toString());
904 state=CsvSplitState.PRE_DATA;
908 state=CsvSplitState.DATA;
916 state=CsvSplitState.SLOSH;
921 list.add(out.toString());
923 state=CsvSplitState.POST_DATA;
931 state=CsvSplitState.QUOTE;
937 state=CsvSplitState.PRE_DATA;
953 list.add(out.toString());
958 list.add(out.toString());
965 public static String sanitizeXmlString(String html)
972 // Are there any characters that need sanitizing?
973 loop: for (;i<html.length();i++)
975 char c=html.charAt(i);
987 if (Character.isISOControl(c) && !Character.isWhitespace(c))
992 // No characters need sanitizing, so return original string
993 if (i==html.length())
996 // Create builder with OK content so far
997 StringBuilder out = new StringBuilder(html.length()*4/3);
998 out.append(html,0,i);
1000 // sanitize remaining content
1001 for (;i<html.length();i++)
1003 char c=html.charAt(i);
1008 out.append("&");
1017 out.append("'");
1020 out.append(""");
1024 if (Character.isISOControl(c) && !Character.isWhitespace(c))
1030 return out.toString();