1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.runtime;
  27 
  28 import java.util.LinkedList;
  29 import java.util.Stack;
  30 import java.util.StringTokenizer;
  31 
  32 /**
  33  * A string tokenizer that supports entries with quotes and nested quotes. If
  34  * the separators are quoted either by ' and ", or whatever quotes the user
  35  * supplies they will be ignored and considered part of another token
  36  */
  37 public final class QuotedStringTokenizer {
  38     private final LinkedList<String> tokens;
  39 
  40     private final char quotes[];
  41 
  42     /**
  43      * Constructor
  44      *
  45      * @param str string to tokenize
  46      */
  47     public QuotedStringTokenizer(final String str) {
  48         this(str, " ");
  49     }
  50 
  51     /**
  52      * Create a quoted string tokenizer
  53      *
  54      * @param str
  55      *            a string to tokenize
  56      * @param delim
  57      *            delimiters between tokens
  58      *
  59      */
  60     public QuotedStringTokenizer(final String str, final String delim) {
  61         this(str, delim, new char[] { '"', '\'' });
  62     }
  63 
  64     /**
  65      * Create a quoted string tokenizer
  66      *
  67      * @param str
  68      *            a string to tokenize
  69      * @param delim
  70      *            delimiters between tokens
  71      * @param quotes
  72      *            all the characters that should be accepted as quotes, default
  73      *            is ' or "
  74      */
  75     private QuotedStringTokenizer(final String str, final String delim, final char[] quotes) {
  76         this.quotes = quotes;
  77 
  78         boolean delimIsWhitespace = true;
  79         for (int i = 0; i < delim.length(); i++) {
  80             if (!Character.isWhitespace(delim.charAt(i))) {
  81                 delimIsWhitespace = false;
  82                 break;
  83             }
  84         }
  85 
  86         final StringTokenizer st = new StringTokenizer(str, delim);
  87         tokens = new LinkedList<>();
  88         while (st.hasMoreTokens()) {
  89             String token = st.nextToken();
  90 
  91             while (unmatchedQuotesIn(token)) {
  92                 if (!st.hasMoreTokens()) {
  93                     throw new IndexOutOfBoundsException(token);
  94                 }
  95                 token += (delimIsWhitespace ? " " : delim) + st.nextToken();
  96             }
  97             tokens.add(stripQuotes(token));
  98         }
  99     }
 100 
 101     /**
 102      * @return the number of tokens in the tokenizer
 103      */
 104     public int countTokens() {
 105         return tokens.size();
 106     }
 107 
 108     /**
 109      * @return true if there are tokens left
 110      */
 111     public boolean hasMoreTokens() {
 112         return countTokens() > 0;
 113     }
 114 
 115     /**
 116      * @return the next token in the tokenizer
 117      */
 118     public String nextToken() {
 119         return tokens.removeFirst();
 120     }
 121 
 122     private String stripQuotes(final String value0) {
 123         String value = value0.trim();
 124         for (final char q : quotes) {
 125             if (value.length() >= 2 && value.startsWith("" + q) && value.endsWith("" + q)) {
 126                 // also go over the value and remove \q sequences. they are just
 127                 // plain q now
 128                 value = value.substring(1, value.length() - 1);
 129                 value = value.replace("\\" + q, "" + q);
 130             }
 131         }
 132         return value;
 133     }
 134 
 135     private boolean unmatchedQuotesIn(final String str) {
 136         final Stack<Character> quoteStack = new Stack<>();
 137         for (int i = 0; i < str.length(); i++) {
 138             final char c = str.charAt(i);
 139             for (final char q : this.quotes) {
 140                 if (c == q) {
 141                     if (quoteStack.isEmpty()) {
 142                         quoteStack.push(c);
 143                     } else {
 144                         final char top = quoteStack.pop();
 145                         if (top != c) {
 146                             quoteStack.push(top);
 147                             quoteStack.push(c);
 148                         }
 149                     }
 150                 }
 151             }
 152         }
 153 
 154         return !quoteStack.isEmpty();
 155     }
 156 }